diff --git a/cmd/agent/cmd.go b/cmd/agent/cmd.go index f1ae581..3093def 100644 --- a/cmd/agent/cmd.go +++ b/cmd/agent/cmd.go @@ -13,8 +13,6 @@ import ( "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared" - "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" - "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/searcher" "github.com/joho/godotenv" @@ -106,24 +104,12 @@ func main() { search := searcher.Agent{ Model: m, - OnGoingToNextPage: func(ctx context.Context) error { - slog.Info("going to next page") - return nil - }, - OnReadingSearchResult: func(ctx context.Context, sr duckduckgo.Result) (any, error) { - slog.Info("reading search result", "url", sr.URL, "title", sr.Title, "description", sr.Description) - return nil, nil - }, - OnFinishedReadingSearchResult: func(ctx context.Context, sr duckduckgo.Result, newKnowledge []string, err error, onReadingResult any) error { - slog.Info("finished reading search result", "err", err, "newKnowledge", newKnowledge) - return nil - }, + OnDone: func(ctx context.Context, knowledge shared.Knowledge) error { slog.Info("done", "knowledge", knowledge) return nil }, - MaxReads: 20, - MaxNextResults: 10, + MaxReads: 20, } processor := knowledge2.KnowledgeProcessor{Model: m} @@ -135,7 +121,7 @@ func main() { slog.Info("knowledge", "knowledge", knowledge) - sum, err := processor.ProcessKnowledge(ctx, knowledge) + sum, err := processor.Process(ctx, knowledge) fmt.Println(sum) return nil diff --git a/go.mod b/go.mod index 565116e..16dd17a 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ replace github.com/rocketlaunchr/google-search => github.com/chrisjoyce911/googl require ( gitea.stevedudenhoeffer.com/steve/go-extractor v0.0.0-20250318064250-39453288ce2a - gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20250318074538-52533238d385 + gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20250321150932-5ba42056adfc github.com/Edw590/go-wolfram v0.0.0-20241010091529-fb9031908c5d github.com/advancedlogic/GoOse v0.0.0-20231203033844-ae6b36caf275 github.com/davecgh/go-spew v1.1.1 diff --git a/pkg/agents/searcher/agent.go b/pkg/agents/searcher/agent.go index 740cbf9..e5b22d7 100644 --- a/pkg/agents/searcher/agent.go +++ b/pkg/agents/searcher/agent.go @@ -6,6 +6,8 @@ import ( "log/slog" "net/url" "strings" + "sync" + "time" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/reader" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared" @@ -14,9 +16,6 @@ import ( gollm "gitea.stevedudenhoeffer.com/steve/go-llm" ) -// kMaxRuns is the maximum number of calls into the LLM this agent will make. -const kMaxRuns = 30 - type Result struct { // Answer is the answer to the question that was asked. Answer string @@ -32,29 +31,14 @@ type Agent struct { // Model is the chat completion model to use Model gollm.ChatCompletion - // OnGoingToNextPage is called when the agent is going to the next page - OnGoingToNextPage func(ctx context.Context) error - - // OnReadingSearchResult is called when the agent is reading a search result - // url is the URL of the search result that is being read. - // The return value is any data that you want to pass to OnFinishedReadingSearchResult. - OnReadingSearchResult func(ctx context.Context, searchResult duckduckgo.Result) (any, error) - - // OnFinishedReadingSearchResult is called when the agent is finished reading a search result. - // newKnowledge is the knowledge that was gained from reading the search result. - // err is any error that occurred while reading the search result. - // onReadingResult is the result of the OnReadingSearchResult function from the same search result. - OnFinishedReadingSearchResult func(ctx context.Context, searchResult duckduckgo.Result, newKnowledge []string, err error, onReadingResult any) error - OnDone func(ctx context.Context, knowledge shared.Knowledge) error // MaxReads is the maximum number of pages that can be read by the agent. Unlimited if <= 0. MaxReads int - // MaxNextResults is the maximum number of times that the next_results function can be called. Unlimited if <= 0. - MaxNextResults int - ContextualInformation []string + + AllowConcurrent bool } // Search will search duckduckgo for the given question, and then read the results to figure out the answer. @@ -67,7 +51,6 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string) OriginalQuestions: []string{question}, RemainingQuestions: []string{question}, } - var done = false browser, ok := ctx.Value("browser").(extractor.Browser) if !ok { @@ -87,15 +70,11 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string) } page, err := cfg.OpenSearch(ctx, browser, searchQuery) + defer deferClose(page) if err != nil { return knowledge, err } - defer deferClose(page) - - var numberOfReads int - var numberOfNextResults int - var searchResults []duckduckgo.Result // filterResults will remove any search results that are in oldSearchResults, or are empty @@ -112,177 +91,160 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string) return res } + _ = page.LoadMore() + time.Sleep(2 * time.Second) + searchResults = filterResults(page.GetResults()) - fnNextResults := gollm.NewFunction( - "next_results", - "get the next page of search results", - func(c *gollm.Context, - arg struct { - Ignored string `description:"This is ignored, only included for API requirements."` - }) (string, error) { - if numberOfNextResults >= a.MaxNextResults && a.MaxNextResults > 0 { - return "Max next results reached", nil - } + var toRead = make(chan int, a.MaxReads) - numberOfNextResults++ - - searchResults = append(searchResults, filterResults(page.GetResults())...) - - // clamp it to the 30 most recent results - if len(searchResults) > 30 { - // remove the first len(searchResults) - 30 elements - searchResults = searchResults[len(searchResults)-30:] - } - return "Got more search results", nil - }, - ) - - fnReadSearchResult := gollm.NewFunction( - "read", - "go to the next page of search results", + fnReadSearchResult := gollm.NewFunction("read", + "read the search result and see if it answers the question", func(c *gollm.Context, arg struct { Num int `description:"The # of the search result to read."` }) (string, error) { - if numberOfReads >= a.MaxReads && a.MaxReads > 0 { - return "Max reads reached", nil - } - - numberOfReads++ - - r := reader.Agent{ - Model: a.Model, - ContextualInformation: a.ContextualInformation, - } - - // num is 1 based, we need 0 based - num := arg.Num - 1 - - // now ensure bounds are good - if num < 0 || num >= len(searchResults) { - return "", fmt.Errorf("search result %d is out of bounds", num) - } - - sr := searchResults[num] - - // remove that search result from the list - searchResults = append(searchResults[:num], searchResults[num+1:]...) - - u, err := url.Parse(sr.URL) - if err != nil { - return "", err - } - - var onReadingResult any - if a.OnReadingSearchResult != nil { - onReadingResult, err = a.OnReadingSearchResult(ctx, sr) - if err != nil { - return "", err - } - - } - - response, err := r.Read(c, question, u) - if err != nil { - return "", err - } - - if a.OnFinishedReadingSearchResult != nil { - var newKnowledge []string - - for _, k := range response.Knowledge { - newKnowledge = append(newKnowledge, k.Info) - } - err = a.OnFinishedReadingSearchResult(ctx, sr, newKnowledge, err, onReadingResult) - if err != nil { - return "", err - } - } - - slog.Info("read finished", "url", u, "knowledge gained", response.Knowledge, "remaining", response.RemainingQuestions) - - knowledge.Knowledge = append(knowledge.Knowledge, response.Knowledge...) - knowledge.RemainingQuestions = response.RemainingQuestions - + toRead <- arg.Num - 1 return "ok", nil }) - fnDone := gollm.NewFunction( - "done", - "finish reading search results", - func(c *gollm.Context, arg struct { - Ignored string `description:"This is ignored, only included for API requirements."` - }) (string, error) { - done = true - return "ok", nil - }) - - for i := 0; i < kMaxRuns && !done; i++ { - tools := gollm.NewToolBox(fnDone) - - if numberOfReads < a.MaxReads || a.MaxReads <= 0 { - tools = tools.WithFunction(*fnReadSearchResult) + readSource := func(ctx context.Context, src duckduckgo.Result) (shared.Knowledge, error) { + r := reader.Agent{ + Model: a.Model, + ContextualInformation: a.ContextualInformation, } - if numberOfNextResults < a.MaxNextResults || a.MaxNextResults <= 0 { - tools = tools.WithFunction(*fnNextResults) + u, err := url.Parse(src.URL) + if err != nil { + return shared.Knowledge{}, err } - var req = gollm.Request{ - Toolbox: tools, + slog.Info("reading search result", "url", u) + response, err := r.Read(ctx, question, u) + if err != nil { + return shared.Knowledge{}, err } - req.Messages = append(req.Messages, gollm.Message{ - Role: gollm.RoleSystem, - Text: `You are searching DuckDuckGo for the answer to the question that will be posed by the user. The results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance: + return response, nil + } + + tools := gollm.NewToolBox(fnReadSearchResult) + var req = gollm.Request{ + Toolbox: tools, + } + + req.Messages = append(req.Messages, gollm.Message{ + Role: gollm.RoleSystem, + Text: `You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance: 1. "https://example.com" - "Example Title" - "This is an example description." 2. "https://example2.com" - "Example Title 2" - "This is an example description 2."`, - }) + }) + if a.MaxReads == 0 { + a.MaxReads = 100 + } + req.Messages = append(req.Messages, gollm.Message{ + Role: gollm.RoleSystem, + Text: fmt.Sprintf(`You can read a search result by using the function "read_search_result" with the # of the page to read, it will attempt to read the page, and then an LLM will read the page and see if it answers the question. +can call read_search_result multiple times, up to %d times. All sources you read will be evaulated to see if they answer the question in full or at least in part.`, a.MaxReads), + }) + + if len(a.ContextualInformation) > 0 { req.Messages = append(req.Messages, gollm.Message{ Role: gollm.RoleSystem, - Text: fmt.Sprintf(`You can read a search result by using the function "read_search_result" with the # of the page to read, -it will attempt to read the page, and then an LLM will read the page and see if it answers the question. The return value will be if there was an answer or not. You only have %d reads left of your original %d. Try to only pick high quality search results to read. -If you need to see more results from DuckDuckGo you can run the function "next_results" to get the next page of results. You only have %d next_results left of your original %d. -You can also use the function "done" to give up on reading the search results and finish executing, indicating you either have nothing left to answer or do not think any of the sources left will answer.`, max(a.MaxReads-numberOfReads, 0), a.MaxReads, max(a.MaxNextResults-numberOfNextResults, 0), a.MaxNextResults), + Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"), }) + } - if len(a.ContextualInformation) > 0 { - req.Messages = append(req.Messages, gollm.Message{ - Role: gollm.RoleSystem, - Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"), - }) + searches := "" + for i, r := range searchResults { + if i > 0 { + searches += "\n" } - searches := "" - for i, r := range searchResults { - if i > 0 { - searches += "\n" - } + searches += fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description) + } - searches += fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description) + req.Messages = append(req.Messages, gollm.Message{ + Role: gollm.RoleSystem, + Text: "Search results are:\n" + searches, + }) + + results, err := a.Model.ChatComplete(ctx, req) + if err != nil { + return knowledge, err + } + + if len(results.Choices) == 0 { + return knowledge, fmt.Errorf("no choices were returned") + } + + choice := results.Choices[0] + + // enforce the maximum number of reads + calls := choice.Calls + if len(calls) > a.MaxReads { + slog.Warn("too many calls, trimming to max", "len", len(calls), "max", a.MaxReads) + calls = calls[:a.MaxReads] + } + + _, err = tools.ExecuteCallbacks(gollm.NewContext(ctx, req, &choice, nil), choice.Calls, nil, nil) + if err != nil { + return knowledge, err + } + + close(toRead) + + // make sure there are no duplicates + var uniques = map[int]struct{}{} + + for i := range toRead { + uniques[i] = struct{}{} + } + + var sources []duckduckgo.Result + + for k := range uniques { + if k < 0 || k >= len(searchResults) { + slog.Warn("search result index out of range", "index", k, "len", len(searchResults)) + + continue + } + sources = append(sources, searchResults[k]) + } + + type result struct { + Knowledge shared.Knowledge + Err error + } + + var gainedKnowledge = make(chan result, len(sources)) + + wg := sync.WaitGroup{} + for _, v := range sources { + wg.Add(1) + go func() { + res, err := readSource(ctx, v) + slog.Info("read search result", "url", v.URL, "err", err) + gainedKnowledge <- result{Knowledge: res, Err: err} + wg.Done() + }() + } + + slog.Info("reading search results", "len", len(sources)) + wg.Wait() + + close(gainedKnowledge) + + slog.Info("done reading search results", "len", len(gainedKnowledge)) + + for r := range gainedKnowledge { + if r.Err != nil { + slog.Info("error reading search result", "err", r.Err) + continue } - req.Messages = append(req.Messages, gollm.Message{ - Role: gollm.RoleSystem, - Text: "Search results are:\n" + searches, - }) - - results, err := a.Model.ChatComplete(ctx, req) - if err != nil { - return knowledge, err - } - - if len(results.Choices) == 0 { - break - } - - choice := results.Choices[0] - - _, err = tools.ExecuteCallbacks(gollm.NewContext(ctx, req, &choice, nil), choice.Calls, nil, nil) - if err != nil { - return knowledge, err - } + knowledge.Knowledge = append(knowledge.Knowledge, r.Knowledge.Knowledge...) + knowledge.RemainingQuestions = append(knowledge.RemainingQuestions, r.Knowledge.RemainingQuestions...) } if a.OnDone != nil {