feat: add audio input support to v2 providers
All checks were successful
CI / Lint (push) Successful in 9m37s
CI / Root Module (push) Successful in 10m53s
CI / V2 Module (push) Successful in 11m9s

Add Audio struct alongside Image for sending audio attachments to
multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3),
Google Gemini uses genai.NewPartFromBytes, and Anthropic skips
audio gracefully since it's not supported.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 21:00:56 -05:00
parent fc2218b5fe
commit 7e1705c385
6 changed files with 137 additions and 1 deletions

View File

@@ -217,6 +217,39 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai.
}
}
for _, aud := range msg.Audio {
if aud.URL != "" {
resp, err := http.Get(aud.URL)
if err != nil {
continue
}
data, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
continue
}
mimeType := resp.Header.Get("Content-Type")
if mimeType == "" {
mimeType = aud.ContentType
}
if mimeType == "" {
mimeType = "audio/wav"
}
parts = append(parts, genai.NewPartFromBytes(data, mimeType))
} else if aud.Base64 != "" {
data, err := base64.StdEncoding.DecodeString(aud.Base64)
if err != nil {
continue
}
ct := aud.ContentType
if ct == "" {
ct = "audio/wav"
}
parts = append(parts, genai.NewPartFromBytes(data, ct))
}
}
contents = append(contents, genai.NewContentFromParts(parts, role))
}