feat: add audio input support to v2 providers

Add Audio struct alongside Image for sending audio attachments to multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3), Google Gemini uses genai.NewPartFromBytes, and Anthropic skips audio gracefully since it's not supported. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 21:00:56 -05:00
parent fc2218b5fe
commit 7e1705c385
6 changed files with 137 additions and 1 deletions
--- a/v2/llm.go
+++ b/v2/llm.go
@@ -140,6 +140,13 @@ func convertMessages(msgs []Message) []provider.Message {
 				ContentType: img.ContentType,
 			})
 		}
+		for _, aud := range m.Content.Audio {
+			pm.Audio = append(pm.Audio, provider.Audio{
+				URL:         aud.URL,
+				Base64:      aud.Base64,
+				ContentType: aud.ContentType,
+			})
+		}
 		for _, tc := range m.ToolCalls {
 			pm.ToolCalls = append(pm.ToolCalls, provider.ToolCall{
 				ID:        tc.ID,