Add YouTube transcript processing with yt-dlp integration

Introduced a new tool for extracting YouTube video transcripts and leveraging them to answer questions. Updated `SearchAndRead` to support reading YouTube transcripts and regular pages distinctly. Included relevant dependencies for handling subtitles and video downloads.
This commit is contained in:
2025-03-27 02:18:16 -04:00
parent 5d2c350acf
commit 1aaed4ea28
4 changed files with 137 additions and 3 deletions

View File

@@ -139,6 +139,11 @@ func (a Agent) SearchAndUseTools(ctx context.Context, searchQuery string, questi
slices.Sort(analyzed)
for j := len(analyzed) - 1; j >= 0; j-- {
v := analyzed[j]
if v < 0 || v >= len(searchResults) {
continue
}
searchResults = append(searchResults[:analyzed[j]], searchResults[analyzed[j]+1:]...)
}
@@ -181,7 +186,6 @@ Use appropriate tools to analyze the search results and determine if they answer
var learned []Knowledge
for _, r := range results.CallResults {
if r.Error != nil {
slog.Error("error executing search function", "error", err)
continue
}
@@ -208,9 +212,15 @@ Use appropriate tools to analyze the search results and determine if they answer
func (a Agent) SearchAndRead(ctx context.Context, searchQuery string, questions []string, allowConcurrent bool, maxReads int) (Knowledge, error) {
return a.SearchAndUseTools(ctx, searchQuery, questions, 2, allowConcurrent, maxReads, []SearchTool{
{
Name: "ReadPage",
Name: "readpage",
Description: "Read the search result and see if it answers the question. Try to avoid using this on low quality or spammy sites. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
Function: a.ReadPage,
},
})
{
Name: "youtube",
Description: "Read the transcript to a youtube video and see if it answers the question. Try to avoid using this on low quality or spammy links. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
Function: a.ReadYouTubeTranscript,
},
},
gollm.Message{Role: gollm.RoleSystem, Text: "For youtube links, only use the youtube tool. For other links, only use the readpage tool."})
}

104
pkg/agents/youtube.go Normal file
View File

@@ -0,0 +1,104 @@
package agents
import (
"context"
"fmt"
"github.com/asticode/go-astisub"
"github.com/lrstanley/go-ytdlp"
"io"
"log/slog"
"net/url"
"os"
"path/filepath"
)
func init() {
ytdlp.MustInstall(context.Background(), nil)
}
func (a Agent) ReadYouTubeTranscript(ctx context.Context, u *url.URL, questions []string) (Knowledge, error) {
dlp := ytdlp.New()
tmpDir, err := os.MkdirTemp("", "mort-ytdlp-")
if err != nil {
return Knowledge{}, fmt.Errorf("error creating temp dir: %w", err)
}
slog.Info("created temp dir", "path", tmpDir)
defer func(path string) {
err := os.RemoveAll(path)
if err != nil {
slog.Error("error removing temp file", "error", err)
}
}(tmpDir)
subFile := filepath.Join(tmpDir, "subs")
dlp.
SkipDownload().
WriteAutoSubs().
Output(subFile)
res, err := dlp.Run(ctx, u.String())
if err != nil {
return Knowledge{}, fmt.Errorf("error running yt-dlp: %w", err)
}
if res == nil {
return Knowledge{}, fmt.Errorf("yt-dlp returned nil")
}
if res.ExitCode != 0 {
return Knowledge{}, fmt.Errorf("yt-dlp exited with code %d", res.ExitCode)
}
// the transcript for this video now _should_ be at tmpDir/subs.en.vtt, however if it's not then just fine any
// vtt file in the directory
vttFile := filepath.Join(tmpDir, "subs.en.vtt")
_, err = os.Stat(vttFile)
if os.IsNotExist(err) {
vttFile = ""
files, err := os.ReadDir(tmpDir)
if err != nil {
return Knowledge{}, fmt.Errorf("error reading directory: %w", err)
}
for _, file := range files {
if filepath.Ext(file.Name()) == ".vtt" {
vttFile = filepath.Join(tmpDir, file.Name())
break
}
}
}
if vttFile == "" {
return Knowledge{}, fmt.Errorf("no vtt file found")
}
fp, err := os.Open(vttFile)
defer func(cl io.Closer) {
err := cl.Close()
if err != nil {
slog.Error("error closing file", "error", err)
}
}(fp)
if err != nil {
return Knowledge{}, fmt.Errorf("error opening vtt file: %w", err)
}
subs, err := astisub.ReadFromWebVTT(fp)
if err != nil {
return Knowledge{}, fmt.Errorf("error reading vtt file: %w", err)
}
if len(subs.Items) == 0 {
return Knowledge{}, fmt.Errorf("no subtitles found")
}
var ts string
for _, item := range subs.Items {
ts += item.String() + "\n"
}
return a.ExtractKnowledge(ctx, ts, u.String(), questions)
}