Add Audio struct alongside Image for sending audio attachments to multimodal LLMs. OpenAI uses input_audio content parts (wav/mp3), Google Gemini uses genai.NewPartFromBytes, and Anthropic skips audio gracefully since it's not supported. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
88 lines
2.5 KiB
Go
88 lines
2.5 KiB
Go
package llm
|
|
|
|
// Role represents who authored a message.
|
|
type Role string
|
|
|
|
const (
|
|
RoleSystem Role = "system"
|
|
RoleUser Role = "user"
|
|
RoleAssistant Role = "assistant"
|
|
RoleTool Role = "tool"
|
|
)
|
|
|
|
// Image represents an image attachment.
|
|
type Image struct {
|
|
// Provide exactly one of URL or Base64.
|
|
URL string // HTTP(S) URL
|
|
Base64 string // Raw base64-encoded data
|
|
ContentType string // MIME type (e.g., "image/png"), required for Base64
|
|
}
|
|
|
|
// Audio represents an audio attachment.
|
|
type Audio struct {
|
|
// Provide exactly one of URL or Base64.
|
|
URL string // HTTP(S) URL to audio file
|
|
Base64 string // Raw base64-encoded audio data
|
|
ContentType string // MIME type (e.g., "audio/wav", "audio/mp3")
|
|
}
|
|
|
|
// Content represents message content with optional text, images, and audio.
|
|
type Content struct {
|
|
Text string
|
|
Images []Image
|
|
Audio []Audio
|
|
}
|
|
|
|
// ToolCall represents a tool invocation requested by the assistant.
|
|
type ToolCall struct {
|
|
ID string
|
|
Name string
|
|
Arguments string // raw JSON
|
|
}
|
|
|
|
// Message represents a single message in a conversation.
|
|
type Message struct {
|
|
Role Role
|
|
Content Content
|
|
|
|
// ToolCallID is set when Role == RoleTool, identifying which tool call this responds to.
|
|
ToolCallID string
|
|
|
|
// ToolCalls is set when the assistant requests tool invocations.
|
|
ToolCalls []ToolCall
|
|
}
|
|
|
|
// UserMessage creates a user message with text content.
|
|
func UserMessage(text string) Message {
|
|
return Message{Role: RoleUser, Content: Content{Text: text}}
|
|
}
|
|
|
|
// UserMessageWithImages creates a user message with text and images.
|
|
func UserMessageWithImages(text string, images ...Image) Message {
|
|
return Message{Role: RoleUser, Content: Content{Text: text, Images: images}}
|
|
}
|
|
|
|
// UserMessageWithAudio creates a user message with text and audio attachments.
|
|
func UserMessageWithAudio(text string, audio ...Audio) Message {
|
|
return Message{Role: RoleUser, Content: Content{Text: text, Audio: audio}}
|
|
}
|
|
|
|
// SystemMessage creates a system prompt message.
|
|
func SystemMessage(text string) Message {
|
|
return Message{Role: RoleSystem, Content: Content{Text: text}}
|
|
}
|
|
|
|
// AssistantMessage creates an assistant message with text content.
|
|
func AssistantMessage(text string) Message {
|
|
return Message{Role: RoleAssistant, Content: Content{Text: text}}
|
|
}
|
|
|
|
// ToolResultMessage creates a tool result message.
|
|
func ToolResultMessage(toolCallID string, result string) Message {
|
|
return Message{
|
|
Role: RoleTool,
|
|
Content: Content{Text: result},
|
|
ToolCallID: toolCallID,
|
|
}
|
|
}
|