package agents import ( "context" "fmt" "io" "log/slog" "net/url" "slices" "strings" "sync" "time" "gitea.stevedudenhoeffer.com/steve/go-extractor" "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" gollm "gitea.stevedudenhoeffer.com/steve/go-llm" ) func deferClose(c io.Closer) { if c != nil { _ = c.Close() } } type SearchTool struct { Name string Description string Function func(ctx context.Context, src *url.URL, questions []string) (Knowledge, error) } // SearchAndUseTools will search duckduckgo for the given question, and then ask the LLM to select a search result to // analyze. The LLM will be given a list of tools to use to analyze the search result, and then the LLM will be asked to // determine if the search results answers the question. // If the context contains a "browser" key that is an extractor.Browser, it will use that browser to search, otherwise a // new one will be created and used for the life of this search and then closed. // searchQuery is the question to search for. // questions is the list of questions that the LLM is trying to answer with the search results. // loops is the number of times to ask the LLM to analyze results if there are remaining questions before giving up. // readers is a list of functions that will be used to read the search results. Any knowledge gained from these readers // will be combined and returned. // messages will be appended to all search results. The types of messages that can be appended are both string and // gollm.Message. func (a Agent) SearchAndUseTools(ctx context.Context, searchQuery string, questions []string, loops int, allowConcurrent bool, maxReads int, tools []SearchTool, messages ...any) (Knowledge, error) { var knowledge = Knowledge{ OriginalQuestions: questions, RemainingQuestions: questions, } browser, ok := ctx.Value("browser").(extractor.Browser) if !ok { b, err := extractor.NewPlayWrightBrowser(extractor.PlayWrightBrowserOptions{}) if err != nil { return knowledge, err } ctx = context.WithValue(ctx, "browser", b) browser = b defer deferClose(browser) } cfg := duckduckgo.Config{ SafeSearch: duckduckgo.SafeSearchOff, Region: "us-en", } page, err := cfg.OpenSearch(ctx, browser, searchQuery) defer deferClose(page) if err != nil { return knowledge, err } var searchResults []duckduckgo.Result // filterResults will remove any search results that are in oldSearchResults, or are empty filterResults := func(in []duckduckgo.Result) []duckduckgo.Result { var res []duckduckgo.Result for _, r := range in { if r.URL == "" { continue } res = append(res, r) } return res } if maxReads == 0 { maxReads = 100 } var lock sync.Mutex var analyzed []int var converted []gollm.Function for _, t := range tools { fn := gollm.NewFunction(t.Name, t.Description, func(c *gollm.Context, arg struct { Num int `description:"The # of search result to analyze."` }) (any, error) { i := arg.Num - 1 defer func() { lock.Lock() defer lock.Unlock() analyzed = append(analyzed, i) }() if i < 0 || i >= len(searchResults) { return nil, fmt.Errorf("index out of range: expect 1-%d", len(searchResults)) } u, err := url.Parse(searchResults[i].URL) if err != nil { return nil, fmt.Errorf("error parsing url: %w", err) } return t.Function(c.Context, u, questions) }) converted = append(converted, fn) } for i := 0; i < loops; i++ { // if any search results have already been analyzed, remove them // but to make this easier, sort the list of analyzed results descending so they can be removed in order // without changing the indexes of the remaining results // but first remove any duplicates var unique = map[int]struct{}{} for _, v := range analyzed { unique[v] = struct{}{} } analyzed = analyzed[:0] for k := range unique { analyzed = append(analyzed, k) } slices.Sort(analyzed) for j := len(analyzed) - 1; j >= 0; j-- { v := analyzed[j] if v < 0 || v >= len(searchResults) { continue } searchResults = append(searchResults[:analyzed[j]], searchResults[analyzed[j]+1:]...) } // remove any search results that have already been analyzed analyzed = analyzed[:0] _ = page.LoadMore() time.Sleep(2 * time.Second) searchResults = filterResults(page.GetResults()) a = a.WithSystemPrompt(`You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance: 1. "https://example.com" - "Example Title" - "This is an example description." 2. "https://example2.com" - "Example Title 2" - "This is an example description 2." Use appropriate tools to analyze the search results and determine if they answer the question.`). WithSystemPromptSuffix(``). WithToolbox(gollm.NewToolBox(converted...).WithRequireTool(true)) var searches = make([]string, len(searchResults)) for i, r := range searchResults { searches[i] = fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description) } if len(searches) > 0 { messages = append(messages, "The search results are:\n"+strings.Join(searches, "\n")) } var results CallAndExecuteResults if allowConcurrent { results, err = a.CallAndExecuteParallel(ctx, messages...) } else { results, err = a.CallAndExecute(ctx, messages...) } if err != nil { return knowledge, fmt.Errorf("error executing search function: %w", err) } slog.Info("search results called and executed", "error", err, "results text", results.Text, "results", results.CallResults) var learned []Knowledge for _, r := range results.CallResults { if r.Error != nil { continue } if k, ok := r.Result.(Knowledge); ok { learned = append(learned, k) } else { slog.Error("result is not knowledge", "result", r.Result) } } knowledge, err = a.KnowledgeIntegrate(ctx, knowledge, learned...) if err != nil { return knowledge, fmt.Errorf("error integrating knowledge: %w", err) } if len(knowledge.RemainingQuestions) == 0 { return knowledge, nil } } return knowledge, nil } func (a Agent) SearchAndRead(ctx context.Context, searchQuery string, questions []string, allowConcurrent bool, maxReads int) (Knowledge, error) { return a.SearchAndUseTools(ctx, searchQuery, questions, 2, allowConcurrent, maxReads, []SearchTool{ { Name: "readpage", Description: "Read the search result and see if it answers the question. Try to avoid using this on low quality or spammy sites. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.", Function: a.ReadPage, }, { Name: "youtube", Description: "Read the transcript to a youtube video and see if it answers the question. Try to avoid using this on low quality or spammy links. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.", Function: a.ReadYouTubeTranscript, }, }, gollm.Message{Role: gollm.RoleSystem, Text: "For youtube links, only use the youtube tool. For other links, only use the readpage tool."}) }