Moved the Knowledge struct and related types to the shared package, updating all references across the codebase. This improves modularity and enables better reuse of the Knowledge type across different components.
108 lines
2.4 KiB
Go
108 lines
2.4 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/asticode/go-astisub"
|
|
"github.com/lrstanley/go-ytdlp"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
|
|
)
|
|
|
|
func init() {
|
|
ytdlp.MustInstall(context.Background(), nil)
|
|
}
|
|
|
|
func (a Agent) ReadYouTubeTranscript(ctx context.Context, u *url.URL, questions []string) (shared.Knowledge, error) {
|
|
dlp := ytdlp.New()
|
|
|
|
tmpDir, err := os.MkdirTemp("", "mort-ytdlp-")
|
|
if err != nil {
|
|
return shared.Knowledge{}, fmt.Errorf("error creating temp dir: %w", err)
|
|
}
|
|
|
|
slog.Info("created temp dir", "path", tmpDir)
|
|
defer func(path string) {
|
|
err := os.RemoveAll(path)
|
|
if err != nil {
|
|
slog.Error("error removing temp file", "error", err)
|
|
}
|
|
}(tmpDir)
|
|
|
|
subFile := filepath.Join(tmpDir, "subs")
|
|
dlp.
|
|
SkipDownload().
|
|
WriteAutoSubs().
|
|
Output(subFile)
|
|
|
|
res, err := dlp.Run(ctx, u.String())
|
|
if err != nil {
|
|
return shared.Knowledge{}, fmt.Errorf("error running yt-dlp: %w", err)
|
|
}
|
|
|
|
if res == nil {
|
|
return shared.Knowledge{}, fmt.Errorf("yt-dlp returned nil")
|
|
}
|
|
|
|
if res.ExitCode != 0 {
|
|
return shared.Knowledge{}, fmt.Errorf("yt-dlp exited with code %d", res.ExitCode)
|
|
}
|
|
|
|
// the transcript for this video now _should_ be at tmpDir/subs.en.vtt, however if it's not then just fine any
|
|
// vtt file in the directory
|
|
vttFile := filepath.Join(tmpDir, "subs.en.vtt")
|
|
|
|
_, err = os.Stat(vttFile)
|
|
if os.IsNotExist(err) {
|
|
vttFile = ""
|
|
files, err := os.ReadDir(tmpDir)
|
|
if err != nil {
|
|
return shared.Knowledge{}, fmt.Errorf("error reading directory: %w", err)
|
|
}
|
|
|
|
for _, file := range files {
|
|
if filepath.Ext(file.Name()) == ".vtt" {
|
|
vttFile = filepath.Join(tmpDir, file.Name())
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if vttFile == "" {
|
|
return shared.Knowledge{}, fmt.Errorf("no vtt file found")
|
|
}
|
|
|
|
fp, err := os.Open(vttFile)
|
|
defer func(cl io.Closer) {
|
|
err := cl.Close()
|
|
if err != nil {
|
|
slog.Error("error closing file", "error", err)
|
|
}
|
|
}(fp)
|
|
if err != nil {
|
|
return shared.Knowledge{}, fmt.Errorf("error opening vtt file: %w", err)
|
|
}
|
|
|
|
subs, err := astisub.ReadFromWebVTT(fp)
|
|
if err != nil {
|
|
return shared.Knowledge{}, fmt.Errorf("error reading vtt file: %w", err)
|
|
}
|
|
|
|
if len(subs.Items) == 0 {
|
|
return shared.Knowledge{}, fmt.Errorf("no subtitles found")
|
|
}
|
|
|
|
var ts string
|
|
for _, item := range subs.Items {
|
|
ts += item.String() + "\n"
|
|
}
|
|
|
|
return a.ExtractKnowledge(ctx, ts, u.String(), questions)
|
|
}
|