answer/pkg/agents/youtube.go
Steve Dudenhoeffer 1c3ea7d1f1 Refactor Knowledge struct into shared package
Moved the Knowledge struct and related types to the shared package, updating all references across the codebase. This improves modularity and enables better reuse of the Knowledge type across different components.
2025-05-03 22:09:02 -04:00

108 lines
2.4 KiB
Go

package agents
import (
"context"
"fmt"
"io"
"log/slog"
"net/url"
"os"
"path/filepath"
"github.com/asticode/go-astisub"
"github.com/lrstanley/go-ytdlp"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
)
func init() {
ytdlp.MustInstall(context.Background(), nil)
}
func (a Agent) ReadYouTubeTranscript(ctx context.Context, u *url.URL, questions []string) (shared.Knowledge, error) {
dlp := ytdlp.New()
tmpDir, err := os.MkdirTemp("", "mort-ytdlp-")
if err != nil {
return shared.Knowledge{}, fmt.Errorf("error creating temp dir: %w", err)
}
slog.Info("created temp dir", "path", tmpDir)
defer func(path string) {
err := os.RemoveAll(path)
if err != nil {
slog.Error("error removing temp file", "error", err)
}
}(tmpDir)
subFile := filepath.Join(tmpDir, "subs")
dlp.
SkipDownload().
WriteAutoSubs().
Output(subFile)
res, err := dlp.Run(ctx, u.String())
if err != nil {
return shared.Knowledge{}, fmt.Errorf("error running yt-dlp: %w", err)
}
if res == nil {
return shared.Knowledge{}, fmt.Errorf("yt-dlp returned nil")
}
if res.ExitCode != 0 {
return shared.Knowledge{}, fmt.Errorf("yt-dlp exited with code %d", res.ExitCode)
}
// the transcript for this video now _should_ be at tmpDir/subs.en.vtt, however if it's not then just fine any
// vtt file in the directory
vttFile := filepath.Join(tmpDir, "subs.en.vtt")
_, err = os.Stat(vttFile)
if os.IsNotExist(err) {
vttFile = ""
files, err := os.ReadDir(tmpDir)
if err != nil {
return shared.Knowledge{}, fmt.Errorf("error reading directory: %w", err)
}
for _, file := range files {
if filepath.Ext(file.Name()) == ".vtt" {
vttFile = filepath.Join(tmpDir, file.Name())
break
}
}
}
if vttFile == "" {
return shared.Knowledge{}, fmt.Errorf("no vtt file found")
}
fp, err := os.Open(vttFile)
defer func(cl io.Closer) {
err := cl.Close()
if err != nil {
slog.Error("error closing file", "error", err)
}
}(fp)
if err != nil {
return shared.Knowledge{}, fmt.Errorf("error opening vtt file: %w", err)
}
subs, err := astisub.ReadFromWebVTT(fp)
if err != nil {
return shared.Knowledge{}, fmt.Errorf("error reading vtt file: %w", err)
}
if len(subs.Items) == 0 {
return shared.Knowledge{}, fmt.Errorf("no subtitles found")
}
var ts string
for _, item := range subs.Items {
ts += item.String() + "\n"
}
return a.ExtractKnowledge(ctx, ts, u.String(), questions)
}