Add Audio struct alongside Image for sending audio attachments to multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3), Google Gemini uses genai.NewPartFromBytes, and Anthropic skips audio gracefully since it's not supported. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
101 lines
2.6 KiB
Go
101 lines
2.6 KiB
Go
// Package provider defines the interface that LLM backend implementations must satisfy.
|
|
package provider
|
|
|
|
import "context"
|
|
|
|
// Message is the provider-level message representation.
|
|
type Message struct {
|
|
Role string
|
|
Content string
|
|
Images []Image
|
|
Audio []Audio
|
|
ToolCalls []ToolCall
|
|
ToolCallID string
|
|
}
|
|
|
|
// Image represents an image attachment at the provider level.
|
|
type Image struct {
|
|
URL string
|
|
Base64 string
|
|
ContentType string
|
|
}
|
|
|
|
// Audio represents an audio attachment at the provider level.
|
|
type Audio struct {
|
|
URL string
|
|
Base64 string
|
|
ContentType string
|
|
}
|
|
|
|
// ToolCall represents a tool invocation requested by the model.
|
|
type ToolCall struct {
|
|
ID string
|
|
Name string
|
|
Arguments string // raw JSON
|
|
}
|
|
|
|
// ToolDef defines a tool available to the model.
|
|
type ToolDef struct {
|
|
Name string
|
|
Description string
|
|
Schema map[string]any // JSON Schema
|
|
}
|
|
|
|
// Request is a completion request at the provider level.
|
|
type Request struct {
|
|
Model string
|
|
Messages []Message
|
|
Tools []ToolDef
|
|
Temperature *float64
|
|
MaxTokens *int
|
|
TopP *float64
|
|
Stop []string
|
|
}
|
|
|
|
// Response is a completion response at the provider level.
|
|
type Response struct {
|
|
Text string
|
|
ToolCalls []ToolCall
|
|
Usage *Usage
|
|
}
|
|
|
|
// Usage captures token consumption.
|
|
type Usage struct {
|
|
InputTokens int
|
|
OutputTokens int
|
|
TotalTokens int
|
|
}
|
|
|
|
// StreamEventType identifies the kind of stream event.
|
|
type StreamEventType int
|
|
|
|
const (
|
|
StreamEventText StreamEventType = iota // Text content delta
|
|
StreamEventToolStart // Tool call begins
|
|
StreamEventToolDelta // Tool call argument delta
|
|
StreamEventToolEnd // Tool call complete
|
|
StreamEventDone // Stream complete
|
|
StreamEventError // Error occurred
|
|
)
|
|
|
|
// StreamEvent represents a single event in a streaming response.
|
|
type StreamEvent struct {
|
|
Type StreamEventType
|
|
Text string
|
|
ToolCall *ToolCall
|
|
ToolIndex int
|
|
Error error
|
|
Response *Response
|
|
}
|
|
|
|
// Provider is the interface that LLM backends implement.
|
|
type Provider interface {
|
|
// Complete performs a non-streaming completion.
|
|
Complete(ctx context.Context, req Request) (Response, error)
|
|
|
|
// Stream performs a streaming completion, sending events to the channel.
|
|
// The provider MUST close the channel when done.
|
|
// The provider MUST send exactly one StreamEventDone as the last non-error event.
|
|
Stream(ctx context.Context, req Request, events chan<- StreamEvent) error
|
|
}
|