proxy,ui-svelte: improve support for v1/messages and v1/responses (#758)
This improves the support for activity logging from the v1/responses and v1/messages endpoints. - add chat endpoint selection to Playground > Chat > Settings - improve metrics extraction for streaming v1/messages and v1/responses endpoints (tested with llama-server) Fixes #742
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
<script lang="ts">
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { streamChatCompletion } from "../../lib/chatApi";
|
||||
import { streamChatCompletion, type Endpoint } from "../../lib/chatApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import type { ChatMessage, ContentPart } from "../../lib/types";
|
||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||
@@ -11,6 +11,8 @@
|
||||
const selectedModelStore = persistentStore<string>("playground-selected-model", "");
|
||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
||||
const endpointStore = persistentStore<Endpoint>("playground-endpoint", "v1/chat/completions");
|
||||
const maxTokensStore = persistentStore<number>("playground-max-tokens", 4096);
|
||||
|
||||
function loadMessages(): ChatMessage[] {
|
||||
try {
|
||||
@@ -142,7 +144,7 @@
|
||||
$selectedModelStore,
|
||||
apiMessages,
|
||||
abortController.signal,
|
||||
{ temperature: $temperatureStore }
|
||||
{ temperature: $temperatureStore, endpoint: $endpointStore, max_tokens: $maxTokensStore }
|
||||
);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
@@ -319,6 +321,19 @@
|
||||
<!-- Settings panel -->
|
||||
{#if showSettings}
|
||||
<div class="shrink-0 mb-4 p-4 bg-surface border border-gray-200 dark:border-white/10 rounded">
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="endpoint">Endpoint</label>
|
||||
<select
|
||||
id="endpoint"
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$endpointStore}
|
||||
disabled={isStreaming}
|
||||
>
|
||||
<option value="v1/chat/completions">/v1/chat/completions</option>
|
||||
<option value="v1/messages">/v1/messages</option>
|
||||
<option value="v1/responses">/v1/responses</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="system-prompt">System Prompt</label>
|
||||
<textarea
|
||||
@@ -330,7 +345,7 @@
|
||||
disabled={isStreaming}
|
||||
></textarea>
|
||||
</div>
|
||||
<div>
|
||||
<div class="mb-4">
|
||||
<label class="block text-sm font-medium mb-1" for="temperature">
|
||||
Temperature: {$temperatureStore.toFixed(2)}
|
||||
</label>
|
||||
@@ -349,6 +364,18 @@
|
||||
<span>Creative (2)</span>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-sm font-medium mb-1" for="max-tokens">Max Tokens</label>
|
||||
<input
|
||||
id="max-tokens"
|
||||
type="number"
|
||||
min="1"
|
||||
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$maxTokensStore}
|
||||
disabled={isStreaming}
|
||||
/>
|
||||
<p class="text-xs text-txtsecondary mt-1">Required for /v1/messages.</p>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
|
||||
+265
-41
@@ -1,4 +1,6 @@
|
||||
import type { ChatMessage, ChatCompletionRequest } from "./types";
|
||||
import type { ChatMessage, ContentPart } from "./types";
|
||||
|
||||
export type Endpoint = "v1/chat/completions" | "v1/messages" | "v1/responses";
|
||||
|
||||
export interface StreamChunk {
|
||||
content: string;
|
||||
@@ -8,9 +10,126 @@ export interface StreamChunk {
|
||||
|
||||
export interface ChatOptions {
|
||||
temperature?: number;
|
||||
endpoint?: Endpoint;
|
||||
max_tokens?: number;
|
||||
}
|
||||
|
||||
function parseSSELine(line: string): StreamChunk | null {
|
||||
function parseDataUrl(url: string): { media_type: string; data: string } {
|
||||
const match = /^data:([^;]+);base64,(.*)$/i.exec(url);
|
||||
if (!match) {
|
||||
throw new Error("Image is not a base64 data URL");
|
||||
}
|
||||
return { media_type: match[1], data: match[2] };
|
||||
}
|
||||
|
||||
function splitSystemMessages(messages: ChatMessage[]): { system: string; rest: ChatMessage[] } {
|
||||
const systemParts: string[] = [];
|
||||
const rest: ChatMessage[] = [];
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "system") {
|
||||
if (typeof msg.content === "string") {
|
||||
systemParts.push(msg.content);
|
||||
} else {
|
||||
for (const part of msg.content) {
|
||||
if (part.type === "text") systemParts.push(part.text);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rest.push(msg);
|
||||
}
|
||||
}
|
||||
return { system: systemParts.join("\n\n"), rest };
|
||||
}
|
||||
|
||||
function buildChatCompletionsBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||
return {
|
||||
model,
|
||||
messages: messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
stream: true,
|
||||
temperature: options?.temperature,
|
||||
...(options?.max_tokens ? { max_tokens: options.max_tokens } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function buildMessagesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||
const { system, rest } = splitSystemMessages(messages);
|
||||
const mapped = rest.map((m) => {
|
||||
if (typeof m.content === "string") {
|
||||
return { role: m.role, content: m.content };
|
||||
}
|
||||
const blocks: object[] = [];
|
||||
for (const part of m.content as ContentPart[]) {
|
||||
if (part.type === "text") {
|
||||
blocks.push({ type: "text", text: part.text });
|
||||
} else if (m.role !== "assistant") {
|
||||
const { media_type, data } = parseDataUrl(part.image_url.url);
|
||||
blocks.push({ type: "image", source: { type: "base64", media_type, data } });
|
||||
}
|
||||
}
|
||||
return { role: m.role, content: blocks };
|
||||
});
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model,
|
||||
messages: mapped,
|
||||
stream: true,
|
||||
max_tokens: options?.max_tokens ?? 4096,
|
||||
};
|
||||
if (system) body.system = system;
|
||||
if (options?.temperature !== undefined) body.temperature = options.temperature;
|
||||
return body;
|
||||
}
|
||||
|
||||
function buildResponsesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||
const { system, rest } = splitSystemMessages(messages);
|
||||
const input = rest.map((m) => {
|
||||
const isAssistant = m.role === "assistant";
|
||||
if (typeof m.content === "string") {
|
||||
const partType = isAssistant ? "output_text" : "input_text";
|
||||
return { role: m.role, content: [{ type: partType, text: m.content }] };
|
||||
}
|
||||
const content = m.content.map((part: ContentPart) => {
|
||||
if (part.type === "text") {
|
||||
return { type: isAssistant ? "output_text" : "input_text", text: part.text };
|
||||
}
|
||||
return { type: "input_image", image_url: part.image_url.url };
|
||||
});
|
||||
return { role: m.role, content };
|
||||
});
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model,
|
||||
input,
|
||||
stream: true,
|
||||
};
|
||||
if (system) body.instructions = system;
|
||||
if (options?.temperature !== undefined) body.temperature = options.temperature;
|
||||
if (options?.max_tokens) body.max_output_tokens = options.max_tokens;
|
||||
return body;
|
||||
}
|
||||
|
||||
function buildRequest(
|
||||
endpoint: Endpoint,
|
||||
model: string,
|
||||
messages: ChatMessage[],
|
||||
options?: ChatOptions
|
||||
): { url: string; body: object } {
|
||||
const url = "/" + endpoint;
|
||||
switch (endpoint) {
|
||||
case "v1/messages":
|
||||
return { url, body: buildMessagesBody(model, messages, options) };
|
||||
case "v1/responses":
|
||||
return { url, body: buildResponsesBody(model, messages, options) };
|
||||
case "v1/chat/completions":
|
||||
default:
|
||||
return { url, body: buildChatCompletionsBody(model, messages, options) };
|
||||
}
|
||||
}
|
||||
|
||||
function parseChatCompletionsLine(line: string): StreamChunk | null {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || !trimmed.startsWith("data: ")) {
|
||||
return null;
|
||||
@@ -36,25 +155,158 @@ function parseSSELine(line: string): StreamChunk | null {
|
||||
}
|
||||
}
|
||||
|
||||
async function* parseChatCompletionsStream(
|
||||
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
|
||||
for (const line of lines) {
|
||||
const result = parseChatCompletionsLine(line);
|
||||
if (result?.done) {
|
||||
yield result;
|
||||
return;
|
||||
}
|
||||
if (result) {
|
||||
yield result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = parseChatCompletionsLine(buffer);
|
||||
if (result && !result.done) {
|
||||
yield result;
|
||||
}
|
||||
}
|
||||
|
||||
function parseSSEEventBlock(block: string): { event: string; data: string } | null {
|
||||
let event = "";
|
||||
const dataLines: string[] = [];
|
||||
for (const rawLine of block.split("\n")) {
|
||||
const line = rawLine.replace(/\r$/, "");
|
||||
if (!line || line.startsWith(":")) continue;
|
||||
if (line.startsWith("event:")) {
|
||||
event = line.slice(6).trim();
|
||||
} else if (line.startsWith("data:")) {
|
||||
dataLines.push(line.slice(5).trim());
|
||||
}
|
||||
}
|
||||
if (dataLines.length === 0 && !event) return null;
|
||||
return { event, data: dataLines.join("\n") };
|
||||
}
|
||||
|
||||
async function* parseMessagesStream(
|
||||
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const blocks = buffer.split("\n\n");
|
||||
buffer = blocks.pop() || "";
|
||||
|
||||
for (const block of blocks) {
|
||||
const parsed = parseSSEEventBlock(block);
|
||||
if (!parsed) continue;
|
||||
if (parsed.event === "message_stop") {
|
||||
yield { content: "", done: true };
|
||||
return;
|
||||
}
|
||||
if (parsed.event !== "content_block_delta" || !parsed.data) continue;
|
||||
try {
|
||||
const json = JSON.parse(parsed.data);
|
||||
const delta = json.delta;
|
||||
if (!delta) continue;
|
||||
if (delta.type === "text_delta" && delta.text) {
|
||||
yield { content: delta.text, done: false };
|
||||
} else if (delta.type === "thinking_delta" && delta.thinking) {
|
||||
yield { content: "", reasoning_content: delta.thinking, done: false };
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed event
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function* parseResponsesStream(
|
||||
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const blocks = buffer.split("\n\n");
|
||||
buffer = blocks.pop() || "";
|
||||
|
||||
for (const block of blocks) {
|
||||
const parsed = parseSSEEventBlock(block);
|
||||
if (!parsed) continue;
|
||||
if (parsed.event === "response.completed") {
|
||||
yield { content: "", done: true };
|
||||
return;
|
||||
}
|
||||
if (!parsed.data) continue;
|
||||
try {
|
||||
const json = JSON.parse(parsed.data);
|
||||
if (parsed.event === "response.output_text.delta" && json.delta) {
|
||||
yield { content: json.delta, done: false };
|
||||
} else if (parsed.event === "response.reasoning_summary_text.delta" && json.delta) {
|
||||
yield { content: "", reasoning_content: json.delta, done: false };
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed event
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseStream(
|
||||
endpoint: Endpoint,
|
||||
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
switch (endpoint) {
|
||||
case "v1/messages":
|
||||
return parseMessagesStream(reader);
|
||||
case "v1/responses":
|
||||
return parseResponsesStream(reader);
|
||||
case "v1/chat/completions":
|
||||
default:
|
||||
return parseChatCompletionsStream(reader);
|
||||
}
|
||||
}
|
||||
|
||||
export async function* streamChatCompletion(
|
||||
model: string,
|
||||
messages: ChatMessage[],
|
||||
signal?: AbortSignal,
|
||||
options?: ChatOptions
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
const request: ChatCompletionRequest = {
|
||||
model,
|
||||
messages,
|
||||
stream: true,
|
||||
temperature: options?.temperature,
|
||||
};
|
||||
const endpoint = options?.endpoint ?? "v1/chat/completions";
|
||||
const { url, body } = buildRequest(endpoint, model, messages, options);
|
||||
|
||||
const response = await fetch("/v1/chat/completions", {
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
});
|
||||
|
||||
@@ -68,39 +320,11 @@ export async function* streamChatCompletion(
|
||||
throw new Error("Response body is not readable");
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
|
||||
for (const line of lines) {
|
||||
const result = parseSSELine(line);
|
||||
if (result?.done) {
|
||||
yield result;
|
||||
return;
|
||||
}
|
||||
if (result) {
|
||||
yield result;
|
||||
}
|
||||
}
|
||||
for await (const chunk of parseStream(endpoint, reader)) {
|
||||
yield chunk;
|
||||
if (chunk.done) return;
|
||||
}
|
||||
|
||||
// Process any remaining buffer
|
||||
const result = parseSSELine(buffer);
|
||||
if (result && !result.done) {
|
||||
yield result;
|
||||
}
|
||||
|
||||
yield { content: "", done: true };
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
|
||||
Reference in New Issue
Block a user