feat: add audio input support to v2 providers
Add Audio struct alongside Image for sending audio attachments to multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3), Google Gemini uses genai.NewPartFromBytes, and Anthropic skips audio gracefully since it's not supported. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -217,6 +217,39 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai.
|
||||
}
|
||||
}
|
||||
|
||||
for _, aud := range msg.Audio {
|
||||
if aud.URL != "" {
|
||||
resp, err := http.Get(aud.URL)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
mimeType := resp.Header.Get("Content-Type")
|
||||
if mimeType == "" {
|
||||
mimeType = aud.ContentType
|
||||
}
|
||||
if mimeType == "" {
|
||||
mimeType = "audio/wav"
|
||||
}
|
||||
parts = append(parts, genai.NewPartFromBytes(data, mimeType))
|
||||
} else if aud.Base64 != "" {
|
||||
data, err := base64.StdEncoding.DecodeString(aud.Base64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
ct := aud.ContentType
|
||||
if ct == "" {
|
||||
ct = "audio/wav"
|
||||
}
|
||||
parts = append(parts, genai.NewPartFromBytes(data, ct))
|
||||
}
|
||||
}
|
||||
|
||||
contents = append(contents, genai.NewContentFromParts(parts, role))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user