proxy,ui-svelte: improve support for v1/messages and v1/responses (#758)

This improves the support for activity logging from the v1/responses and v1/messages endpoints. - add chat endpoint selection to Playground > Chat > Settings - improve metrics extraction for streaming v1/messages and v1/responses endpoints (tested with llama-server) Fixes #742
2026-05-14 21:53:57 -07:00
parent aac7b8745a
commit fe71e8a6ea
4 changed files with 537 additions and 119 deletions
@@ -1,7 +1,7 @@
 <script lang="ts">
  import { models } from "../../stores/api";
  import { persistentStore } from "../../stores/persistent";
-  import { streamChatCompletion } from "../../lib/chatApi";
+  import { streamChatCompletion, type Endpoint } from "../../lib/chatApi";
  import { playgroundStores } from "../../stores/playgroundActivity";
  import type { ChatMessage, ContentPart } from "../../lib/types";
  import ChatMessageComponent from "./ChatMessage.svelte";
@@ -11,6 +11,8 @@
  const selectedModelStore = persistentStore<string>("playground-selected-model", "");
  const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
  const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
+  const endpointStore = persistentStore<Endpoint>("playground-endpoint", "v1/chat/completions");
+  const maxTokensStore = persistentStore<number>("playground-max-tokens", 4096);

  function loadMessages(): ChatMessage[] {
    try {
@@ -142,7 +144,7 @@
        $selectedModelStore,
        apiMessages,
        abortController.signal,
-        { temperature: $temperatureStore }
+        { temperature: $temperatureStore, endpoint: $endpointStore, max_tokens: $maxTokensStore }
      );

      for await (const chunk of stream) {
@@ -319,6 +321,19 @@
  <!-- Settings panel -->
  {#if showSettings}
    <div class="shrink-0 mb-4 p-4 bg-surface border border-gray-200 dark:border-white/10 rounded">
+      <div class="mb-4">
+        <label class="block text-sm font-medium mb-1" for="endpoint">Endpoint</label>
+        <select
+          id="endpoint"
+          class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
+          bind:value={$endpointStore}
+          disabled={isStreaming}
+        >
+          <option value="v1/chat/completions">/v1/chat/completions</option>
+          <option value="v1/messages">/v1/messages</option>
+          <option value="v1/responses">/v1/responses</option>
+        </select>
+      </div>
      <div class="mb-4">
        <label class="block text-sm font-medium mb-1" for="system-prompt">System Prompt</label>
        <textarea
@@ -330,7 +345,7 @@
          disabled={isStreaming}
        ></textarea>
      </div>
-      <div>
+      <div class="mb-4">
        <label class="block text-sm font-medium mb-1" for="temperature">
          Temperature: {$temperatureStore.toFixed(2)}
        </label>
@@ -349,6 +364,18 @@
          <span>Creative (2)</span>
        </div>
      </div>
+      <div>
+        <label class="block text-sm font-medium mb-1" for="max-tokens">Max Tokens</label>
+        <input
+          id="max-tokens"
+          type="number"
+          min="1"
+          class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
+          bind:value={$maxTokensStore}
+          disabled={isStreaming}
+        />
+        <p class="text-xs text-txtsecondary mt-1">Required for /v1/messages.</p>
+      </div>
    </div>
  {/if}

@@ -1,4 +1,6 @@
-import type { ChatMessage, ChatCompletionRequest } from "./types";
+import type { ChatMessage, ContentPart } from "./types";
+
+export type Endpoint = "v1/chat/completions" | "v1/messages" | "v1/responses";

 export interface StreamChunk {
  content: string;
@@ -8,9 +10,126 @@ export interface StreamChunk {

 export interface ChatOptions {
  temperature?: number;
+  endpoint?: Endpoint;
+  max_tokens?: number;
 }

-function parseSSELine(line: string): StreamChunk | null {
+function parseDataUrl(url: string): { media_type: string; data: string } {
+  const match = /^data:([^;]+);base64,(.*)$/i.exec(url);
+  if (!match) {
+    throw new Error("Image is not a base64 data URL");
+  }
+  return { media_type: match[1], data: match[2] };
+}
+
+function splitSystemMessages(messages: ChatMessage[]): { system: string; rest: ChatMessage[] } {
+  const systemParts: string[] = [];
+  const rest: ChatMessage[] = [];
+  for (const msg of messages) {
+    if (msg.role === "system") {
+      if (typeof msg.content === "string") {
+        systemParts.push(msg.content);
+      } else {
+        for (const part of msg.content) {
+          if (part.type === "text") systemParts.push(part.text);
+        }
+      }
+    } else {
+      rest.push(msg);
+    }
+  }
+  return { system: systemParts.join("\n\n"), rest };
+}
+
+function buildChatCompletionsBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
+  return {
+    model,
+    messages: messages.map((m) => ({
+      role: m.role,
+      content: m.content,
+    })),
+    stream: true,
+    temperature: options?.temperature,
+    ...(options?.max_tokens ? { max_tokens: options.max_tokens } : {}),
+  };
+}
+
+function buildMessagesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
+  const { system, rest } = splitSystemMessages(messages);
+  const mapped = rest.map((m) => {
+    if (typeof m.content === "string") {
+      return { role: m.role, content: m.content };
+    }
+    const blocks: object[] = [];
+    for (const part of m.content as ContentPart[]) {
+      if (part.type === "text") {
+        blocks.push({ type: "text", text: part.text });
+      } else if (m.role !== "assistant") {
+        const { media_type, data } = parseDataUrl(part.image_url.url);
+        blocks.push({ type: "image", source: { type: "base64", media_type, data } });
+      }
+    }
+    return { role: m.role, content: blocks };
+  });
+
+  const body: Record<string, unknown> = {
+    model,
+    messages: mapped,
+    stream: true,
+    max_tokens: options?.max_tokens ?? 4096,
+  };
+  if (system) body.system = system;
+  if (options?.temperature !== undefined) body.temperature = options.temperature;
+  return body;
+}
+
+function buildResponsesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
+  const { system, rest } = splitSystemMessages(messages);
+  const input = rest.map((m) => {
+    const isAssistant = m.role === "assistant";
+    if (typeof m.content === "string") {
+      const partType = isAssistant ? "output_text" : "input_text";
+      return { role: m.role, content: [{ type: partType, text: m.content }] };
+    }
+    const content = m.content.map((part: ContentPart) => {
+      if (part.type === "text") {
+        return { type: isAssistant ? "output_text" : "input_text", text: part.text };
+      }
+      return { type: "input_image", image_url: part.image_url.url };
+    });
+    return { role: m.role, content };
+  });
+
+  const body: Record<string, unknown> = {
+    model,
+    input,
+    stream: true,
+  };
+  if (system) body.instructions = system;
+  if (options?.temperature !== undefined) body.temperature = options.temperature;
+  if (options?.max_tokens) body.max_output_tokens = options.max_tokens;
+  return body;
+}
+
+function buildRequest(
+  endpoint: Endpoint,
+  model: string,
+  messages: ChatMessage[],
+  options?: ChatOptions
+): { url: string; body: object } {
+  const url = "/" + endpoint;
+  switch (endpoint) {
+    case "v1/messages":
+      return { url, body: buildMessagesBody(model, messages, options) };
+    case "v1/responses":
+      return { url, body: buildResponsesBody(model, messages, options) };
+    case "v1/chat/completions":
+    default:
+      return { url, body: buildChatCompletionsBody(model, messages, options) };
+  }
+}
+
+function parseChatCompletionsLine(line: string): StreamChunk | null {
  const trimmed = line.trim();
  if (!trimmed || !trimmed.startsWith("data: ")) {
    return null;
@@ -36,25 +155,158 @@ function parseSSELine(line: string): StreamChunk | null {
  }
 }

+async function* parseChatCompletionsStream(
+  reader: ReadableStreamDefaultReader<Uint8Array>
+): AsyncGenerator<StreamChunk> {
+  const decoder = new TextDecoder();
+  let buffer = "";
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const lines = buffer.split("\n");
+    buffer = lines.pop() || "";
+
+    for (const line of lines) {
+      const result = parseChatCompletionsLine(line);
+      if (result?.done) {
+        yield result;
+        return;
+      }
+      if (result) {
+        yield result;
+      }
+    }
+  }
+
+  const result = parseChatCompletionsLine(buffer);
+  if (result && !result.done) {
+    yield result;
+  }
+}
+
+function parseSSEEventBlock(block: string): { event: string; data: string } | null {
+  let event = "";
+  const dataLines: string[] = [];
+  for (const rawLine of block.split("\n")) {
+    const line = rawLine.replace(/\r$/, "");
+    if (!line || line.startsWith(":")) continue;
+    if (line.startsWith("event:")) {
+      event = line.slice(6).trim();
+    } else if (line.startsWith("data:")) {
+      dataLines.push(line.slice(5).trim());
+    }
+  }
+  if (dataLines.length === 0 && !event) return null;
+  return { event, data: dataLines.join("\n") };
+}
+
+async function* parseMessagesStream(
+  reader: ReadableStreamDefaultReader<Uint8Array>
+): AsyncGenerator<StreamChunk> {
+  const decoder = new TextDecoder();
+  let buffer = "";
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const blocks = buffer.split("\n\n");
+    buffer = blocks.pop() || "";
+
+    for (const block of blocks) {
+      const parsed = parseSSEEventBlock(block);
+      if (!parsed) continue;
+      if (parsed.event === "message_stop") {
+        yield { content: "", done: true };
+        return;
+      }
+      if (parsed.event !== "content_block_delta" || !parsed.data) continue;
+      try {
+        const json = JSON.parse(parsed.data);
+        const delta = json.delta;
+        if (!delta) continue;
+        if (delta.type === "text_delta" && delta.text) {
+          yield { content: delta.text, done: false };
+        } else if (delta.type === "thinking_delta" && delta.thinking) {
+          yield { content: "", reasoning_content: delta.thinking, done: false };
+        }
+      } catch {
+        // ignore malformed event
+      }
+    }
+  }
+}
+
+async function* parseResponsesStream(
+  reader: ReadableStreamDefaultReader<Uint8Array>
+): AsyncGenerator<StreamChunk> {
+  const decoder = new TextDecoder();
+  let buffer = "";
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const blocks = buffer.split("\n\n");
+    buffer = blocks.pop() || "";
+
+    for (const block of blocks) {
+      const parsed = parseSSEEventBlock(block);
+      if (!parsed) continue;
+      if (parsed.event === "response.completed") {
+        yield { content: "", done: true };
+        return;
+      }
+      if (!parsed.data) continue;
+      try {
+        const json = JSON.parse(parsed.data);
+        if (parsed.event === "response.output_text.delta" && json.delta) {
+          yield { content: json.delta, done: false };
+        } else if (parsed.event === "response.reasoning_summary_text.delta" && json.delta) {
+          yield { content: "", reasoning_content: json.delta, done: false };
+        }
+      } catch {
+        // ignore malformed event
+      }
+    }
+  }
+}
+
+function parseStream(
+  endpoint: Endpoint,
+  reader: ReadableStreamDefaultReader<Uint8Array>
+): AsyncGenerator<StreamChunk> {
+  switch (endpoint) {
+    case "v1/messages":
+      return parseMessagesStream(reader);
+    case "v1/responses":
+      return parseResponsesStream(reader);
+    case "v1/chat/completions":
+    default:
+      return parseChatCompletionsStream(reader);
+  }
+}
+
 export async function* streamChatCompletion(
  model: string,
  messages: ChatMessage[],
  signal?: AbortSignal,
  options?: ChatOptions
 ): AsyncGenerator<StreamChunk> {
-  const request: ChatCompletionRequest = {
-    model,
-    messages,
-    stream: true,
-    temperature: options?.temperature,
-  };
+  const endpoint = options?.endpoint ?? "v1/chat/completions";
+  const { url, body } = buildRequest(endpoint, model, messages, options);

-  const response = await fetch("/v1/chat/completions", {
+  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
-    body: JSON.stringify(request),
+    body: JSON.stringify(body),
    signal,
  });

@@ -68,39 +320,11 @@ export async function* streamChatCompletion(
    throw new Error("Response body is not readable");
  }

-  const decoder = new TextDecoder();
-  let buffer = "";
-
  try {
-    while (true) {
-      const { done, value } = await reader.read();
-
-      if (done) {
-        break;
-      }
-
-      buffer += decoder.decode(value, { stream: true });
-      const lines = buffer.split("\n");
-      buffer = lines.pop() || "";
-
-      for (const line of lines) {
-        const result = parseSSELine(line);
-        if (result?.done) {
-          yield result;
-          return;
-        }
-        if (result) {
-          yield result;
-        }
-      }
+    for await (const chunk of parseStream(endpoint, reader)) {
+      yield chunk;
+      if (chunk.done) return;
    }
-
-    // Process any remaining buffer
-    const result = parseSSELine(buffer);
-    if (result && !result.done) {
-      yield result;
-    }
-
    yield { content: "", done: true };
  } finally {
    reader.releaseLock();