Move defer statement to ensure browser closure occurs only after assigning the browser to the context. This prevents potential issues of premature resource release.
228 lines
7.2 KiB
Go
228 lines
7.2 KiB
Go
package agents
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
|
|
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
|
)
|
|
|
|
func deferClose(c io.Closer) {
|
|
if c != nil {
|
|
_ = c.Close()
|
|
}
|
|
|
|
}
|
|
|
|
type SearchTool struct {
|
|
Name string
|
|
Description string
|
|
Function func(ctx context.Context, src *url.URL, questions []string) (Knowledge, error)
|
|
}
|
|
|
|
// SearchAndUseTools will search duckduckgo for the given question, and then ask the LLM to select a search result to
|
|
// analyze. The LLM will be given a list of tools to use to analyze the search result, and then the LLM will be asked to
|
|
// determine if the search results answers the question.
|
|
// If the context contains a "browser" key that is an extractor.Browser, it will use that browser to search, otherwise a
|
|
// new one will be created and used for the life of this search and then closed.
|
|
// searchQuery is the question to search for.
|
|
// questions is the list of questions that the LLM is trying to answer with the search results.
|
|
// loops is the number of times to ask the LLM to analyze results if there are remaining questions before giving up.
|
|
// readers is a list of functions that will be used to read the search results. Any knowledge gained from these readers
|
|
// will be combined and returned.
|
|
// messages will be appended to all search results. The types of messages that can be appended are both string and
|
|
// gollm.Message.
|
|
func (a Agent) SearchAndUseTools(ctx context.Context, searchQuery string, questions []string, loops int, allowConcurrent bool, maxReads int, tools []SearchTool, messages ...any) (Knowledge, error) {
|
|
var knowledge = Knowledge{
|
|
OriginalQuestions: questions,
|
|
RemainingQuestions: questions,
|
|
}
|
|
|
|
browser, ok := ctx.Value("browser").(extractor.Browser)
|
|
if !ok {
|
|
b, err := extractor.NewPlayWrightBrowser(extractor.PlayWrightBrowserOptions{})
|
|
if err != nil {
|
|
return knowledge, err
|
|
}
|
|
|
|
ctx = context.WithValue(ctx, "browser", b)
|
|
browser = b
|
|
defer deferClose(browser)
|
|
}
|
|
|
|
cfg := duckduckgo.Config{
|
|
SafeSearch: duckduckgo.SafeSearchOff,
|
|
Region: "us-en",
|
|
}
|
|
|
|
page, err := cfg.OpenSearch(ctx, browser, searchQuery)
|
|
defer deferClose(page)
|
|
if err != nil {
|
|
return knowledge, err
|
|
}
|
|
|
|
var searchResults []duckduckgo.Result
|
|
|
|
// filterResults will remove any search results that are in oldSearchResults, or are empty
|
|
filterResults := func(in []duckduckgo.Result) []duckduckgo.Result {
|
|
var res []duckduckgo.Result
|
|
for _, r := range in {
|
|
if r.URL == "" {
|
|
continue
|
|
}
|
|
|
|
res = append(res, r)
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
if maxReads == 0 {
|
|
maxReads = 100
|
|
}
|
|
|
|
var lock sync.Mutex
|
|
var analyzed []int
|
|
var converted []gollm.Function
|
|
|
|
for _, t := range tools {
|
|
fn := gollm.NewFunction(t.Name, t.Description,
|
|
func(c *gollm.Context, arg struct {
|
|
Num int `description:"The # of search result to analyze."`
|
|
}) (any, error) {
|
|
i := arg.Num - 1
|
|
|
|
defer func() {
|
|
lock.Lock()
|
|
defer lock.Unlock()
|
|
analyzed = append(analyzed, i)
|
|
}()
|
|
|
|
if i < 0 || i >= len(searchResults) {
|
|
return nil, fmt.Errorf("index out of range: expect 1-%d", len(searchResults))
|
|
}
|
|
|
|
u, err := url.Parse(searchResults[i].URL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing url: %w", err)
|
|
}
|
|
|
|
return t.Function(c.Context, u, questions)
|
|
})
|
|
converted = append(converted, fn)
|
|
}
|
|
|
|
for i := 0; i < loops; i++ {
|
|
// if any search results have already been analyzed, remove them
|
|
// but to make this easier, sort the list of analyzed results descending so they can be removed in order
|
|
// without changing the indexes of the remaining results
|
|
|
|
// but first remove any duplicates
|
|
var unique = map[int]struct{}{}
|
|
for _, v := range analyzed {
|
|
unique[v] = struct{}{}
|
|
}
|
|
|
|
analyzed = analyzed[:0]
|
|
for k := range unique {
|
|
analyzed = append(analyzed, k)
|
|
}
|
|
|
|
slices.Sort(analyzed)
|
|
for j := len(analyzed) - 1; j >= 0; j-- {
|
|
v := analyzed[j]
|
|
if v < 0 || v >= len(searchResults) {
|
|
continue
|
|
}
|
|
|
|
searchResults = append(searchResults[:analyzed[j]], searchResults[analyzed[j]+1:]...)
|
|
}
|
|
|
|
// remove any search results that have already been analyzed
|
|
analyzed = analyzed[:0]
|
|
|
|
_ = page.LoadMore()
|
|
time.Sleep(2 * time.Second)
|
|
|
|
searchResults = filterResults(page.GetResults())
|
|
|
|
a = a.WithSystemPrompt(`You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance:
|
|
1. "https://example.com" - "Example Title" - "This is an example description."
|
|
2. "https://example2.com" - "Example Title 2" - "This is an example description 2."
|
|
|
|
Use appropriate tools to analyze the search results and determine if they answer the question.`).
|
|
WithSystemPromptSuffix(``).
|
|
WithToolbox(gollm.NewToolBox(converted...).WithRequireTool(true))
|
|
|
|
var searches = make([]string, len(searchResults))
|
|
for i, r := range searchResults {
|
|
searches[i] = fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description)
|
|
}
|
|
|
|
if len(searches) > 0 {
|
|
messages = append(messages, "The search results are:\n"+strings.Join(searches, "\n"))
|
|
}
|
|
|
|
var results CallAndExecuteResults
|
|
if allowConcurrent {
|
|
results, err = a.CallAndExecuteParallel(ctx, messages...)
|
|
} else {
|
|
results, err = a.CallAndExecute(ctx, messages...)
|
|
}
|
|
|
|
if err != nil {
|
|
return knowledge, fmt.Errorf("error executing search function: %w", err)
|
|
}
|
|
slog.Info("search results called and executed", "error", err, "results text", results.Text, "results", results.CallResults)
|
|
|
|
var learned []Knowledge
|
|
for _, r := range results.CallResults {
|
|
if r.Error != nil {
|
|
continue
|
|
}
|
|
|
|
if k, ok := r.Result.(Knowledge); ok {
|
|
learned = append(learned, k)
|
|
} else {
|
|
slog.Error("result is not knowledge", "result", r.Result)
|
|
}
|
|
}
|
|
|
|
knowledge, err = a.KnowledgeIntegrate(ctx, knowledge, learned...)
|
|
if err != nil {
|
|
return knowledge, fmt.Errorf("error integrating knowledge: %w", err)
|
|
}
|
|
|
|
if len(knowledge.RemainingQuestions) == 0 {
|
|
return knowledge, nil
|
|
}
|
|
}
|
|
|
|
return knowledge, nil
|
|
}
|
|
|
|
func (a Agent) SearchAndRead(ctx context.Context, searchQuery string, questions []string, allowConcurrent bool, maxReads int) (Knowledge, error) {
|
|
return a.SearchAndUseTools(ctx, searchQuery, questions, 2, allowConcurrent, maxReads, []SearchTool{
|
|
{
|
|
Name: "readpage",
|
|
Description: "Read the search result and see if it answers the question. Try to avoid using this on low quality or spammy sites. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
|
|
Function: a.ReadPage,
|
|
},
|
|
{
|
|
Name: "youtube",
|
|
Description: "Read the transcript to a youtube video and see if it answers the question. Try to avoid using this on low quality or spammy links. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
|
|
Function: a.ReadYouTubeTranscript,
|
|
},
|
|
},
|
|
gollm.Message{Role: gollm.RoleSystem, Text: "For youtube links, only use the youtube tool. For other links, only use the readpage tool."})
|
|
}
|