Skip to content

Commit c6b1250

Browse files
authored
add diagnostics to ai search route (#55008)
1 parent 5a63615 commit c6b1250

File tree

1 file changed

+33
-13
lines changed

1 file changed

+33
-13
lines changed

src/search/lib/ai-search-proxy.ts

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import { Request, Response } from 'express'
2+
import statsd from '@/observability/lib/statsd'
23
import got from 'got'
34
import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth'
4-
import { getCSECopilotSource } from '#src/search/lib/helpers/cse-copilot-docs-versions.js'
5+
import { getCSECopilotSource } from '@/search/lib/helpers/cse-copilot-docs-versions'
56

67
const memoryCache = new Map<string, Buffer>()
78

89
export const aiSearchProxy = async (req: Request, res: Response) => {
910
const { query, version, language } = req.body
11+
1012
const errors = []
1113

1214
// Validate request body
@@ -34,13 +36,25 @@ export const aiSearchProxy = async (req: Request, res: Response) => {
3436
return
3537
}
3638

39+
const diagnosticTags = [
40+
`version:${version}`.slice(0, 200),
41+
`language:${language}`.slice(0, 200),
42+
`queryLength:${query.length}`.slice(0, 200),
43+
]
44+
statsd.increment('ai-search.call', 1, diagnosticTags)
45+
46+
// TODO: Caching here may cause an issue if the cache grows too large. Additionally, the cache will be inconsistent across pods
3747
const cacheKey = `${query}:${version}:${language}`
3848
if (memoryCache.has(cacheKey)) {
49+
statsd.increment('ai-search.cache_hit', 1, diagnosticTags)
3950
res.setHeader('Content-Type', 'application/x-ndjson')
4051
res.send(memoryCache.get(cacheKey))
4152
return
4253
}
4354

55+
const startTime = Date.now()
56+
let totalChars = 0
57+
4458
const body = {
4559
chat_context: 'docs',
4660
docs_source: docsSource,
@@ -57,22 +71,19 @@ export const aiSearchProxy = async (req: Request, res: Response) => {
5771
},
5872
})
5973

60-
const chunks: Buffer[] = []
61-
stream.on('data', (chunk) => {
62-
chunks.push(chunk)
74+
// Listen for data events to count characters
75+
stream.on('data', (chunk: Buffer | string) => {
76+
// Ensure we have a string for proper character count
77+
const dataStr = typeof chunk === 'string' ? chunk : chunk.toString()
78+
totalChars += dataStr.length
6379
})
6480

6581
// Handle the upstream response before piping
6682
stream.on('response', (upstreamResponse) => {
67-
// When cse-copilot returns a 204, it means the backend received the request
68-
// but was unable to answer the question. So we return a 400 to the client to be handled.
69-
if (upstreamResponse.statusCode === 204) {
70-
return res
71-
.status(400)
72-
.json({ errors: [{ message: 'Sorry I am unable to answer this question.' }] })
73-
} else if (upstreamResponse.statusCode !== 200) {
83+
if (upstreamResponse.statusCode !== 200) {
7484
const errorMessage = `Upstream server responded with status code ${upstreamResponse.statusCode}`
7585
console.error(errorMessage)
86+
statsd.increment('ai-search.stream_response_error', 1, diagnosticTags)
7687
res.status(500).json({ errors: [{ message: errorMessage }] })
7788
stream.destroy()
7889
} else {
@@ -95,6 +106,8 @@ export const aiSearchProxy = async (req: Request, res: Response) => {
95106
.json({ errors: [{ message: 'Sorry I am unable to answer this question.' }] })
96107
}
97108

109+
statsd.increment('ai-search.stream_error', 1, diagnosticTags)
110+
98111
if (!res.headersSent) {
99112
res.status(500).json({ errors: [{ message: 'Internal server error' }] })
100113
} else {
@@ -106,12 +119,19 @@ export const aiSearchProxy = async (req: Request, res: Response) => {
106119
}
107120
})
108121

109-
// Ensure response ends when stream ends
122+
// Calculate metrics on stream end
110123
stream.on('end', () => {
111-
memoryCache.set(cacheKey, Buffer.concat(chunks as Uint8Array[]))
124+
const totalResponseTime = Date.now() - startTime // in ms
125+
const charPerMsRatio = totalResponseTime > 0 ? totalChars / totalResponseTime : 0 // chars per ms
126+
127+
statsd.gauge('ai-search.total_response_time', totalResponseTime, diagnosticTags)
128+
statsd.gauge('ai-search.response_chars_per_ms', charPerMsRatio, diagnosticTags)
129+
130+
statsd.increment('ai-search.success_stream_end', 1, diagnosticTags)
112131
res.end()
113132
})
114133
} catch (error) {
134+
statsd.increment('ai-search.route_error', 1, diagnosticTags)
115135
console.error('Error posting /answers to cse-copilot:', error)
116136
res.status(500).json({ errors: [{ message: 'Internal server error' }] })
117137
}

0 commit comments

Comments
 (0)