Add Audio struct alongside Image for sending audio attachments to multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3), Google Gemini uses genai.NewPartFromBytes, and Anthropic skips audio gracefully since it's not supported. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
276 lines
6.4 KiB
Go
276 lines
6.4 KiB
Go
// Package anthropic implements the go-llm v2 provider interface for Anthropic.
|
|
package anthropic
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/internal/imageutil"
|
|
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
|
|
|
|
anth "github.com/liushuangls/go-anthropic/v2"
|
|
)
|
|
|
|
// Provider implements the provider.Provider interface for Anthropic.
|
|
type Provider struct {
|
|
apiKey string
|
|
}
|
|
|
|
// New creates a new Anthropic provider.
|
|
func New(apiKey string) *Provider {
|
|
return &Provider{apiKey: apiKey}
|
|
}
|
|
|
|
// Complete performs a non-streaming completion.
|
|
func (p *Provider) Complete(ctx context.Context, req provider.Request) (provider.Response, error) {
|
|
cl := anth.NewClient(p.apiKey)
|
|
|
|
anthReq := p.buildRequest(req)
|
|
|
|
resp, err := cl.CreateMessages(ctx, anthReq)
|
|
if err != nil {
|
|
return provider.Response{}, fmt.Errorf("anthropic completion error: %w", err)
|
|
}
|
|
|
|
return p.convertResponse(resp), nil
|
|
}
|
|
|
|
// Stream performs a streaming completion.
|
|
func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan<- provider.StreamEvent) error {
|
|
cl := anth.NewClient(p.apiKey)
|
|
|
|
anthReq := p.buildRequest(req)
|
|
|
|
resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{
|
|
MessagesRequest: anthReq,
|
|
OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) {
|
|
if data.Delta.Type == "text_delta" && data.Delta.Text != nil {
|
|
events <- provider.StreamEvent{
|
|
Type: provider.StreamEventText,
|
|
Text: *data.Delta.Text,
|
|
}
|
|
}
|
|
},
|
|
})
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("anthropic stream error: %w", err)
|
|
}
|
|
|
|
result := p.convertResponse(resp)
|
|
events <- provider.StreamEvent{
|
|
Type: provider.StreamEventDone,
|
|
Response: &result,
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
|
anthReq := anth.MessagesRequest{
|
|
Model: anth.Model(req.Model),
|
|
MaxTokens: 4096,
|
|
}
|
|
|
|
if req.MaxTokens != nil {
|
|
anthReq.MaxTokens = *req.MaxTokens
|
|
}
|
|
|
|
var msgs []anth.Message
|
|
|
|
for _, msg := range req.Messages {
|
|
if msg.Role == "system" {
|
|
if len(anthReq.System) > 0 {
|
|
anthReq.System += "\n"
|
|
}
|
|
anthReq.System += msg.Content
|
|
continue
|
|
}
|
|
|
|
if msg.Role == "tool" {
|
|
// Tool results in Anthropic format - use the helper
|
|
toolUseID := msg.ToolCallID
|
|
content := msg.Content
|
|
isError := false
|
|
msgs = append(msgs, anth.Message{
|
|
Role: anth.RoleUser,
|
|
Content: []anth.MessageContent{
|
|
{
|
|
Type: anth.MessagesContentTypeToolResult,
|
|
MessageContentToolResult: &anth.MessageContentToolResult{
|
|
ToolUseID: &toolUseID,
|
|
Content: []anth.MessageContent{
|
|
{
|
|
Type: anth.MessagesContentTypeText,
|
|
Text: &content,
|
|
},
|
|
},
|
|
IsError: &isError,
|
|
},
|
|
},
|
|
},
|
|
})
|
|
continue
|
|
}
|
|
|
|
role := anth.RoleUser
|
|
if msg.Role == "assistant" {
|
|
role = anth.RoleAssistant
|
|
}
|
|
|
|
m := anth.Message{
|
|
Role: role,
|
|
Content: []anth.MessageContent{},
|
|
}
|
|
|
|
if msg.Content != "" {
|
|
m.Content = append(m.Content, anth.MessageContent{
|
|
Type: anth.MessagesContentTypeText,
|
|
Text: &msg.Content,
|
|
})
|
|
}
|
|
|
|
// Handle tool calls in assistant messages
|
|
for _, tc := range msg.ToolCalls {
|
|
var input json.RawMessage
|
|
if tc.Arguments != "" {
|
|
input = json.RawMessage(tc.Arguments)
|
|
} else {
|
|
input = json.RawMessage("{}")
|
|
}
|
|
m.Content = append(m.Content, anth.MessageContent{
|
|
Type: anth.MessagesContentTypeToolUse,
|
|
MessageContentToolUse: &anth.MessageContentToolUse{
|
|
ID: tc.ID,
|
|
Name: tc.Name,
|
|
Input: input,
|
|
},
|
|
})
|
|
}
|
|
|
|
// Handle images
|
|
for _, img := range msg.Images {
|
|
if role == anth.RoleAssistant {
|
|
role = anth.RoleUser
|
|
m.Role = anth.RoleUser
|
|
}
|
|
|
|
if img.Base64 != "" {
|
|
b64 := img.Base64
|
|
contentType := img.ContentType
|
|
|
|
// Compress if > 5MiB
|
|
raw, err := base64.StdEncoding.DecodeString(b64)
|
|
if err == nil && len(raw) >= 5242880 {
|
|
compressed, mime, cerr := imageutil.CompressImage(b64, 5*1024*1024)
|
|
if cerr == nil {
|
|
b64 = compressed
|
|
contentType = mime
|
|
}
|
|
}
|
|
|
|
m.Content = append(m.Content, anth.NewImageMessageContent(
|
|
anth.NewMessageContentSource(
|
|
anth.MessagesContentSourceTypeBase64,
|
|
contentType,
|
|
b64,
|
|
)))
|
|
} else if img.URL != "" {
|
|
// Download and convert to base64 (Anthropic doesn't support URLs directly)
|
|
resp, err := http.Get(img.URL)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
data, err := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
b64 := base64.StdEncoding.EncodeToString(data)
|
|
|
|
m.Content = append(m.Content, anth.NewImageMessageContent(
|
|
anth.NewMessageContentSource(
|
|
anth.MessagesContentSourceTypeBase64,
|
|
contentType,
|
|
b64,
|
|
)))
|
|
}
|
|
}
|
|
|
|
// Audio is not supported by Anthropic — skip silently.
|
|
|
|
// Merge consecutive same-role messages (Anthropic requires alternating)
|
|
if len(msgs) > 0 && msgs[len(msgs)-1].Role == role {
|
|
msgs[len(msgs)-1].Content = append(msgs[len(msgs)-1].Content, m.Content...)
|
|
} else {
|
|
msgs = append(msgs, m)
|
|
}
|
|
}
|
|
|
|
for _, tool := range req.Tools {
|
|
anthReq.Tools = append(anthReq.Tools, anth.ToolDefinition{
|
|
Name: tool.Name,
|
|
Description: tool.Description,
|
|
InputSchema: tool.Schema,
|
|
})
|
|
}
|
|
|
|
anthReq.Messages = msgs
|
|
|
|
if req.Temperature != nil {
|
|
f := float32(*req.Temperature)
|
|
anthReq.Temperature = &f
|
|
}
|
|
|
|
if req.TopP != nil {
|
|
f := float32(*req.TopP)
|
|
anthReq.TopP = &f
|
|
}
|
|
|
|
if len(req.Stop) > 0 {
|
|
anthReq.StopSequences = req.Stop
|
|
}
|
|
|
|
return anthReq
|
|
}
|
|
|
|
func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response {
|
|
var res provider.Response
|
|
var textParts []string
|
|
|
|
for _, block := range resp.Content {
|
|
switch block.Type {
|
|
case anth.MessagesContentTypeText:
|
|
if block.Text != nil {
|
|
textParts = append(textParts, *block.Text)
|
|
}
|
|
case anth.MessagesContentTypeToolUse:
|
|
if block.MessageContentToolUse != nil {
|
|
args, _ := json.Marshal(block.MessageContentToolUse.Input)
|
|
res.ToolCalls = append(res.ToolCalls, provider.ToolCall{
|
|
ID: block.MessageContentToolUse.ID,
|
|
Name: block.MessageContentToolUse.Name,
|
|
Arguments: string(args),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
res.Text = strings.Join(textParts, "")
|
|
|
|
res.Usage = &provider.Usage{
|
|
InputTokens: resp.Usage.InputTokens,
|
|
OutputTokens: resp.Usage.OutputTokens,
|
|
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
|
|
}
|
|
|
|
return res
|
|
}
|