Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b4ee70154 |
@@ -240,7 +240,6 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
||||||
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
||||||
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
@@ -253,6 +252,14 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
usage := parsed.Get("usage")
|
usage := parsed.Get("usage")
|
||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
|
// extract timings for infill - response is an array, timings are in the last element
|
||||||
|
// see #463
|
||||||
|
if strings.HasPrefix(request.URL.Path, "/infill") {
|
||||||
|
if arr := parsed.Array(); len(arr) > 0 {
|
||||||
|
timings = arr[len(arr)-1].Get("timings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
||||||
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
|
|||||||
@@ -384,6 +384,75 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, metrics[0].InputTokens)
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("infill request extracts timings from last array element", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
// Infill response is an array with timings in the last element
|
||||||
|
responseBody := `[
|
||||||
|
{"content": "first chunk"},
|
||||||
|
{"content": "second chunk"},
|
||||||
|
{"content": "final", "timings": {
|
||||||
|
"prompt_n": 150,
|
||||||
|
"predicted_n": 75,
|
||||||
|
"prompt_per_second": 200.5,
|
||||||
|
"predicted_per_second": 35.5,
|
||||||
|
"prompt_ms": 600.0,
|
||||||
|
"predicted_ms": 1800.0,
|
||||||
|
"cache_n": 30
|
||||||
|
}}
|
||||||
|
]`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/infill", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 150, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 75, metrics[0].OutputTokens)
|
||||||
|
assert.Equal(t, 30, metrics[0].CachedTokens)
|
||||||
|
assert.Equal(t, 200.5, metrics[0].PromptPerSecond)
|
||||||
|
assert.Equal(t, 35.5, metrics[0].TokensPerSecond)
|
||||||
|
assert.Equal(t, 2400, metrics[0].DurationMs) // 600 + 1800
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("infill request with empty array records minimal metrics", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
responseBody := `[]`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/infill", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
||||||
|
|||||||
Reference in New Issue
Block a user