Add YouTube transcript processing with yt-dlp integration

Introduced a new tool for extracting YouTube video transcripts and leveraging them to answer questions. Updated `SearchAndRead` to support reading YouTube transcripts and regular pages distinctly. Included relevant dependencies for handling subtitles and video downloads.
This commit is contained in:
Steve Dudenhoeffer 2025-03-27 02:18:16 -04:00
parent 5d2c350acf
commit 1aaed4ea28
4 changed files with 137 additions and 3 deletions

6
go.mod
View File

@ -29,12 +29,17 @@ require (
cloud.google.com/go/compute/metadata v0.6.0 // indirect
cloud.google.com/go/longrunning v0.6.6 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.1.5 // indirect
github.com/PuerkitoBio/goquery v1.10.2 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/antchfx/htmlquery v1.3.4 // indirect
github.com/antchfx/xmlquery v1.4.4 // indirect
github.com/antchfx/xpath v1.3.3 // indirect
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
github.com/asticode/go-astikit v0.20.0 // indirect
github.com/asticode/go-astisub v0.34.0 // indirect
github.com/asticode/go-astits v1.8.0 // indirect
github.com/cloudflare/circl v1.6.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
@ -65,6 +70,7 @@ require (
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/liushuangls/go-anthropic/v2 v2.14.1 // indirect
github.com/lrstanley/go-ytdlp v0.0.0-20250326003011-0f33c19e522a // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/term v0.5.2 // indirect

14
go.sum
View File

@ -25,6 +25,8 @@ github.com/Edw590/go-wolfram v0.0.0-20241010091529-fb9031908c5d h1:dxGZ0drmrUfNO
github.com/Edw590/go-wolfram v0.0.0-20241010091529-fb9031908c5d/go.mod h1:ubjYqrt3dF4G+YVEDQr+qa2aveeMzt27o/GOH2hswPo=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/ProtonMail/go-crypto v1.1.5 h1:eoAQfK2dwL+tFSFpr7TbOaPNUbPiJj4fLYwwGE1FQO4=
github.com/ProtonMail/go-crypto v1.1.5/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
github.com/PuerkitoBio/goquery v1.4.1/go.mod h1:T9ezsOHcCrDCgA8aF1Cqr3sSYbO/xgdy8/R/XiIMAhA=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
@ -56,12 +58,20 @@ github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwq
github.com/araddon/dateparse v0.0.0-20180729174819-cfd92a431d0e/go.mod h1:SLqhdZcd+dF3TEVL2RMoob5bBP5R1P1qkox+HtCBgGI=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
github.com/asticode/go-astikit v0.20.0 h1:+7N+J4E4lWx2QOkRdOf6DafWJMv6O4RRfgClwQokrH8=
github.com/asticode/go-astikit v0.20.0/go.mod h1:h4ly7idim1tNhaVkdVBeXQZEE3L0xblP7fCWbgwipF0=
github.com/asticode/go-astisub v0.34.0 h1:owKNj0A9pc7YVW/rNy2MJZ1mf0L8DTdklZVfyZDhTWI=
github.com/asticode/go-astisub v0.34.0/go.mod h1:WTkuSzFB+Bp7wezuSf2Oxulj5A8zu2zLRVFf6bIFQK8=
github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6oZg=
github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chrisjoyce911/google-search v0.0.0-20230910003754-e501aedf805a h1:OZQiBySVd55npXVsIKnJT6q+9A1tPiXhGnFlc+q0YqQ=
github.com/chrisjoyce911/google-search v0.0.0-20230910003754-e501aedf805a/go.mod h1:fk5J/qPpaRDjLWdFxT+dmuiqG7kxXArC7K8A+gj88Nk=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/circl v1.6.0 h1:cr5JKic4HI+LkINy2lg3W2jF8sHCVTBncJr5gIIq7qk=
github.com/cloudflare/circl v1.6.0/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@ -167,6 +177,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/liushuangls/go-anthropic/v2 v2.14.1 h1:t07ckMN7qLkI4yIPJMPNjkwyLV6SEou6UHT/a4rpIHY=
github.com/liushuangls/go-anthropic/v2 v2.14.1/go.mod h1:HQ3//ql9jcgP6zpL5R11OkHijWuYVH1iwJSSF0x+Jlk=
github.com/lrstanley/go-ytdlp v0.0.0-20250326003011-0f33c19e522a h1:vAPYvcDbdlbH0XCNDsgybUld2ASBD1zAx+HEPQCbcOk=
github.com/lrstanley/go-ytdlp v0.0.0-20250326003011-0f33c19e522a/go.mod h1:HpxGaeaOpXVUPxUUmj8Izr3helrDGN90haPtmpY5xzA=
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
@ -190,6 +202,7 @@ github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgr
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.4.0/go.mod h1:NWz/XGvpEW1FyYQ7fCx4dqYBLlfTcE+A9FLAkNKqjFE=
github.com/playwright-community/playwright-go v0.5001.0 h1:EY3oB+rU9cUp6CLHguWE8VMZTwAg+83Yyb7dQqEmGLg=
github.com/playwright-community/playwright-go v0.5001.0/go.mod h1:kBNWs/w2aJ2ZUp1wEOOFLXgOqvppFngM5OS+qyhl+ZM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@ -300,6 +313,7 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=

View File

@ -139,6 +139,11 @@ func (a Agent) SearchAndUseTools(ctx context.Context, searchQuery string, questi
slices.Sort(analyzed)
for j := len(analyzed) - 1; j >= 0; j-- {
v := analyzed[j]
if v < 0 || v >= len(searchResults) {
continue
}
searchResults = append(searchResults[:analyzed[j]], searchResults[analyzed[j]+1:]...)
}
@ -181,7 +186,6 @@ Use appropriate tools to analyze the search results and determine if they answer
var learned []Knowledge
for _, r := range results.CallResults {
if r.Error != nil {
slog.Error("error executing search function", "error", err)
continue
}
@ -208,9 +212,15 @@ Use appropriate tools to analyze the search results and determine if they answer
func (a Agent) SearchAndRead(ctx context.Context, searchQuery string, questions []string, allowConcurrent bool, maxReads int) (Knowledge, error) {
return a.SearchAndUseTools(ctx, searchQuery, questions, 2, allowConcurrent, maxReads, []SearchTool{
{
Name: "ReadPage",
Name: "readpage",
Description: "Read the search result and see if it answers the question. Try to avoid using this on low quality or spammy sites. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
Function: a.ReadPage,
},
})
{
Name: "youtube",
Description: "Read the transcript to a youtube video and see if it answers the question. Try to avoid using this on low quality or spammy links. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
Function: a.ReadYouTubeTranscript,
},
},
gollm.Message{Role: gollm.RoleSystem, Text: "For youtube links, only use the youtube tool. For other links, only use the readpage tool."})
}

104
pkg/agents/youtube.go Normal file
View File

@ -0,0 +1,104 @@
package agents
import (
"context"
"fmt"
"github.com/asticode/go-astisub"
"github.com/lrstanley/go-ytdlp"
"io"
"log/slog"
"net/url"
"os"
"path/filepath"
)
func init() {
ytdlp.MustInstall(context.Background(), nil)
}
func (a Agent) ReadYouTubeTranscript(ctx context.Context, u *url.URL, questions []string) (Knowledge, error) {
dlp := ytdlp.New()
tmpDir, err := os.MkdirTemp("", "mort-ytdlp-")
if err != nil {
return Knowledge{}, fmt.Errorf("error creating temp dir: %w", err)
}
slog.Info("created temp dir", "path", tmpDir)
defer func(path string) {
err := os.RemoveAll(path)
if err != nil {
slog.Error("error removing temp file", "error", err)
}
}(tmpDir)
subFile := filepath.Join(tmpDir, "subs")
dlp.
SkipDownload().
WriteAutoSubs().
Output(subFile)
res, err := dlp.Run(ctx, u.String())
if err != nil {
return Knowledge{}, fmt.Errorf("error running yt-dlp: %w", err)
}
if res == nil {
return Knowledge{}, fmt.Errorf("yt-dlp returned nil")
}
if res.ExitCode != 0 {
return Knowledge{}, fmt.Errorf("yt-dlp exited with code %d", res.ExitCode)
}
// the transcript for this video now _should_ be at tmpDir/subs.en.vtt, however if it's not then just fine any
// vtt file in the directory
vttFile := filepath.Join(tmpDir, "subs.en.vtt")
_, err = os.Stat(vttFile)
if os.IsNotExist(err) {
vttFile = ""
files, err := os.ReadDir(tmpDir)
if err != nil {
return Knowledge{}, fmt.Errorf("error reading directory: %w", err)
}
for _, file := range files {
if filepath.Ext(file.Name()) == ".vtt" {
vttFile = filepath.Join(tmpDir, file.Name())
break
}
}
}
if vttFile == "" {
return Knowledge{}, fmt.Errorf("no vtt file found")
}
fp, err := os.Open(vttFile)
defer func(cl io.Closer) {
err := cl.Close()
if err != nil {
slog.Error("error closing file", "error", err)
}
}(fp)
if err != nil {
return Knowledge{}, fmt.Errorf("error opening vtt file: %w", err)
}
subs, err := astisub.ReadFromWebVTT(fp)
if err != nil {
return Knowledge{}, fmt.Errorf("error reading vtt file: %w", err)
}
if len(subs.Items) == 0 {
return Knowledge{}, fmt.Errorf("no subtitles found")
}
var ts string
for _, item := range subs.Items {
ts += item.String() + "\n"
}
return a.ExtractKnowledge(ctx, ts, u.String(), questions)
}