proxy: support timings for /infill from llama-server (#510)

fixes: #463
This commit is contained in:
Benson Wong
2026-02-07 17:16:27 -08:00
committed by GitHub
parent b5fde8eb6d
commit 8d6d949ec3
2 changed files with 77 additions and 1 deletions
+8 -1
View File
@@ -240,7 +240,6 @@ func (mp *metricsMonitor) wrapHandler(
return nil
}
}
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
@@ -253,6 +252,14 @@ func (mp *metricsMonitor) wrapHandler(
usage := parsed.Get("usage")
timings := parsed.Get("timings")
// extract timings for infill - response is an array, timings are in the last element
// see #463
if strings.HasPrefix(request.URL.Path, "/infill") {
if arr := parsed.Array(); len(arr) > 0 {
timings = arr[len(arr)-1].Get("timings")
}
}
if usage.Exists() || timings.Exists() {
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)