package searcher import ( "context" "fmt" "log/slog" "net/url" "strings" "sync" "time" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/reader" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared" "gitea.stevedudenhoeffer.com/steve/go-extractor" "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" gollm "gitea.stevedudenhoeffer.com/steve/go-llm" ) type Result struct { // Answer is the answer to the question that was asked. Answer string // Sources is a list of sources that were used to find the answer. Sources []string // Remaining is the remaining part(s) of the question that was not answered. Remaining string } type Agent struct { // Model is the chat completion model to use Model gollm.ChatCompletion OnDone func(ctx context.Context, knowledge shared.Knowledge) error // MaxReads is the maximum number of pages that can be read by the agent. Unlimited if <= 0. MaxReads int ContextualInformation []string AllowConcurrent bool } // Search will search duckduckgo for the given question, and then read the results to figure out the answer. // searchQuery is the query that you want to search for, e.g. "what is the capital of France site:reddit.com" // question is the question that you are trying to answer when reading the search results. // If the context contains a "browser" key that is an extractor.Browser, it will use that browser to search, otherwise a // new one will be created and used for the life of this search and then closed. func (a Agent) Search(ctx context.Context, searchQuery string, question string) (shared.Knowledge, error) { var knowledge = shared.Knowledge{ OriginalQuestions: []string{question}, RemainingQuestions: []string{question}, } browser, ok := ctx.Value("browser").(extractor.Browser) if !ok { b, err := extractor.NewPlayWrightBrowser(extractor.PlayWrightBrowserOptions{}) if err != nil { return knowledge, err } defer deferClose(browser) ctx = context.WithValue(ctx, "browser", b) browser = b } cfg := duckduckgo.Config{ SafeSearch: duckduckgo.SafeSearchOff, Region: "us-en", } page, err := cfg.OpenSearch(ctx, browser, searchQuery) defer deferClose(page) if err != nil { return knowledge, err } var searchResults []duckduckgo.Result // filterResults will remove any search results that are in oldSearchResults, or are empty filterResults := func(in []duckduckgo.Result) []duckduckgo.Result { var res []duckduckgo.Result for _, r := range in { if r.URL == "" { continue } res = append(res, r) } return res } _ = page.LoadMore() time.Sleep(2 * time.Second) searchResults = filterResults(page.GetResults()) var toRead = make(chan int, a.MaxReads) fnReadSearchResult := gollm.NewFunction("read", "read the search result and see if it answers the question", func(c *gollm.Context, arg struct { Num int `description:"The # of the search result to read."` }) (string, error) { toRead <- arg.Num - 1 return "ok", nil }) readSource := func(ctx context.Context, src duckduckgo.Result) (shared.Knowledge, error) { r := reader.Agent{ Model: a.Model, ContextualInformation: a.ContextualInformation, } u, err := url.Parse(src.URL) if err != nil { return shared.Knowledge{}, err } slog.Info("reading search result", "url", u) response, err := r.Read(ctx, question, u) if err != nil { return shared.Knowledge{}, err } return response, nil } tools := gollm.NewToolBox(fnReadSearchResult) var req = gollm.Request{ Toolbox: tools, } req.Messages = append(req.Messages, gollm.Message{ Role: gollm.RoleSystem, Text: `You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance: 1. "https://example.com" - "Example Title" - "This is an example description." 2. "https://example2.com" - "Example Title 2" - "This is an example description 2."`, }) if a.MaxReads == 0 { a.MaxReads = 100 } req.Messages = append(req.Messages, gollm.Message{ Role: gollm.RoleSystem, Text: fmt.Sprintf(`You can read a search result by using the function "read_search_result" with the # of the page to read, it will attempt to read the page, and then an LLM will read the page and see if it answers the question. can call read_search_result multiple times, up to %d times. All sources you read will be evaulated to see if they answer the question in full or at least in part.`, a.MaxReads), }) if len(a.ContextualInformation) > 0 { req.Messages = append(req.Messages, gollm.Message{ Role: gollm.RoleSystem, Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"), }) } searches := "" for i, r := range searchResults { if i > 0 { searches += "\n" } searches += fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description) } req.Messages = append(req.Messages, gollm.Message{ Role: gollm.RoleSystem, Text: "Search results are:\n" + searches, }) results, err := a.Model.ChatComplete(ctx, req) if err != nil { return knowledge, err } if len(results.Choices) == 0 { return knowledge, fmt.Errorf("no choices were returned") } choice := results.Choices[0] // enforce the maximum number of reads calls := choice.Calls if len(calls) > a.MaxReads { slog.Warn("too many calls, trimming to max", "len", len(calls), "max", a.MaxReads) calls = calls[:a.MaxReads] } _, err = tools.ExecuteCallbacks(gollm.NewContext(ctx, req, &choice, nil), choice.Calls, nil, nil) if err != nil { return knowledge, err } close(toRead) // make sure there are no duplicates var uniques = map[int]struct{}{} for i := range toRead { uniques[i] = struct{}{} } var sources []duckduckgo.Result for k := range uniques { if k < 0 || k >= len(searchResults) { slog.Warn("search result index out of range", "index", k, "len", len(searchResults)) continue } sources = append(sources, searchResults[k]) } type result struct { Knowledge shared.Knowledge Err error } var gainedKnowledge = make(chan result, len(sources)) wg := sync.WaitGroup{} for _, v := range sources { wg.Add(1) go func() { res, err := readSource(ctx, v) slog.Info("read search result", "url", v.URL, "err", err) gainedKnowledge <- result{Knowledge: res, Err: err} wg.Done() }() } slog.Info("reading search results", "len", len(sources)) wg.Wait() close(gainedKnowledge) slog.Info("done reading search results", "len", len(gainedKnowledge)) for r := range gainedKnowledge { if r.Err != nil { slog.Info("error reading search result", "err", r.Err) continue } knowledge.Knowledge = append(knowledge.Knowledge, r.Knowledge.Knowledge...) knowledge.RemainingQuestions = append(knowledge.RemainingQuestions, r.Knowledge.RemainingQuestions...) } if a.OnDone != nil { err := a.OnDone(ctx, knowledge) if err != nil { return knowledge, err } } return knowledge, nil }