P2: run kernel + run.Ports inversion — executus is runnable #2
+4
-1
@@ -34,6 +34,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -574,7 +575,9 @@ func (h *Helper) recordLedger(ctx context.Context, call MetaCall) {
|
|||||||
if h.storage == nil {
|
if h.storage == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
_ = h.storage.RecordMetaCall(ctx, call)
|
if err := h.storage.RecordMetaCall(ctx, call); err != nil {
|
||||||
|
slog.Warn("llmmeta: failed to record ledger row", "err", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tryParseJSON attempts to decode text as JSON. Returns the parsed
|
// tryParseJSON attempts to decode text as JSON. Returns the parsed
|
||||||
|
|||||||
+1
-1
@@ -237,7 +237,7 @@ func recordUsage(ctx context.Context, resp *llm.Response) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
u := resp.Usage
|
u := resp.Usage
|
||||||
if u.InputTokens == 0 && u.OutputTokens == 0 {
|
if u.InputTokens == 0 && u.OutputTokens == 0 && u.CacheReadTokens == 0 && u.CacheWriteTokens == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
model := resolvedModelName(ctx, resp)
|
model := resolvedModelName(ctx, resp)
|
||||||
|
|||||||
+7
-2
@@ -314,7 +314,7 @@ func (c *CloudOllamaLimitCache) fetchTags(ctx context.Context) ([]string, error)
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(io.LimitReader(resp.Body, maxLimitCacheResponseBytes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -367,7 +367,7 @@ func (c *CloudOllamaLimitCache) fetchContextLength(ctx context.Context, modelNam
|
|||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
respBody, err := io.ReadAll(resp.Body)
|
respBody, err := io.ReadAll(io.LimitReader(resp.Body, maxLimitCacheResponseBytes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@@ -451,3 +451,8 @@ func truncate(b []byte, n int) string {
|
|||||||
}
|
}
|
||||||
return string(b[:n]) + "...(truncated)"
|
return string(b[:n]) + "...(truncated)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxLimitCacheResponseBytes bounds the ollama.com limit-cache HTTP responses
|
||||||
|
// (/api/tags, /api/show) so a misbehaving endpoint can't stream an unbounded
|
||||||
|
// body before the 15s timeout fires. 1 MiB is far above any real response.
|
||||||
|
const maxLimitCacheResponseBytes = 1 << 20
|
||||||
|
|||||||
Reference in New Issue
Block a user