// Command multimodal demonstrates capability-aware image input: attach an // image without knowing the target's limits — majordomo sniffs the real // format, downscales, re-encodes, and enforces counts/sizes against the // actual serving target before the request goes out. package main import ( "context" "flag" "fmt" "log" "os" "gitea.stevedudenhoeffer.com/steve/majordomo" ) func main() { model := flag.String("model", "ollama-cloud/kimi-k2.6:cloud", "vision-capable model spec") path := flag.String("image", "", "path to a jpeg/png/gif image (required)") flag.Parse() if *path == "" { log.Fatal("usage: multimodal -image photo.jpg [-model spec]") } data, err := os.ReadFile(*path) if err != nil { log.Fatalf("read image: %v", err) } m, err := majordomo.Parse(*model) if err != nil { log.Fatalf("parse: %v", err) } // The declared MIME may even be wrong — the media pipeline sniffs the // bytes and corrects it. resp, err := m.Generate(context.Background(), majordomo.Request{ Messages: []majordomo.Message{majordomo.UserParts( majordomo.Text("Describe this image in two sentences."), majordomo.Image("image/jpeg", data), )}, }) if err != nil { log.Fatalf("generate: %v", err) } fmt.Printf("[%s] %s\n", resp.Model, resp.Text()) }