package agents import ( "context" "fmt" "github.com/asticode/go-astisub" "github.com/lrstanley/go-ytdlp" "io" "log/slog" "net/url" "os" "path/filepath" ) func init() { ytdlp.MustInstall(context.Background(), nil) } func (a Agent) ReadYouTubeTranscript(ctx context.Context, u *url.URL, questions []string) (Knowledge, error) { dlp := ytdlp.New() tmpDir, err := os.MkdirTemp("", "mort-ytdlp-") if err != nil { return Knowledge{}, fmt.Errorf("error creating temp dir: %w", err) } slog.Info("created temp dir", "path", tmpDir) defer func(path string) { err := os.RemoveAll(path) if err != nil { slog.Error("error removing temp file", "error", err) } }(tmpDir) subFile := filepath.Join(tmpDir, "subs") dlp. SkipDownload(). WriteAutoSubs(). Output(subFile) res, err := dlp.Run(ctx, u.String()) if err != nil { return Knowledge{}, fmt.Errorf("error running yt-dlp: %w", err) } if res == nil { return Knowledge{}, fmt.Errorf("yt-dlp returned nil") } if res.ExitCode != 0 { return Knowledge{}, fmt.Errorf("yt-dlp exited with code %d", res.ExitCode) } // the transcript for this video now _should_ be at tmpDir/subs.en.vtt, however if it's not then just fine any // vtt file in the directory vttFile := filepath.Join(tmpDir, "subs.en.vtt") _, err = os.Stat(vttFile) if os.IsNotExist(err) { vttFile = "" files, err := os.ReadDir(tmpDir) if err != nil { return Knowledge{}, fmt.Errorf("error reading directory: %w", err) } for _, file := range files { if filepath.Ext(file.Name()) == ".vtt" { vttFile = filepath.Join(tmpDir, file.Name()) break } } } if vttFile == "" { return Knowledge{}, fmt.Errorf("no vtt file found") } fp, err := os.Open(vttFile) defer func(cl io.Closer) { err := cl.Close() if err != nil { slog.Error("error closing file", "error", err) } }(fp) if err != nil { return Knowledge{}, fmt.Errorf("error opening vtt file: %w", err) } subs, err := astisub.ReadFromWebVTT(fp) if err != nil { return Knowledge{}, fmt.Errorf("error reading vtt file: %w", err) } if len(subs.Items) == 0 { return Knowledge{}, fmt.Errorf("no subtitles found") } var ts string for _, item := range subs.Items { ts += item.String() + "\n" } return a.ExtractKnowledge(ctx, ts, u.String(), questions) }