Refactor search agent for concurrency and simplify flow

Removed redundant fields and callbacks in the search agent while introducing concurrent processing for reading search results. Updated logic to enhance readability and modularity, including capped reads and streamlined interaction with search results. Adjusted dependencies and related usage to align with the refactored design.
This commit is contained in:
Steve Dudenhoeffer 2025-03-21 18:12:19 -04:00
parent 693ac4e6a7
commit 5407c1a7cc
3 changed files with 136 additions and 188 deletions

View File

@ -13,8 +13,6 @@ import (
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/searcher" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/searcher"
"github.com/joho/godotenv" "github.com/joho/godotenv"
@ -106,24 +104,12 @@ func main() {
search := searcher.Agent{ search := searcher.Agent{
Model: m, Model: m,
OnGoingToNextPage: func(ctx context.Context) error {
slog.Info("going to next page")
return nil
},
OnReadingSearchResult: func(ctx context.Context, sr duckduckgo.Result) (any, error) {
slog.Info("reading search result", "url", sr.URL, "title", sr.Title, "description", sr.Description)
return nil, nil
},
OnFinishedReadingSearchResult: func(ctx context.Context, sr duckduckgo.Result, newKnowledge []string, err error, onReadingResult any) error {
slog.Info("finished reading search result", "err", err, "newKnowledge", newKnowledge)
return nil
},
OnDone: func(ctx context.Context, knowledge shared.Knowledge) error { OnDone: func(ctx context.Context, knowledge shared.Knowledge) error {
slog.Info("done", "knowledge", knowledge) slog.Info("done", "knowledge", knowledge)
return nil return nil
}, },
MaxReads: 20, MaxReads: 20,
MaxNextResults: 10,
} }
processor := knowledge2.KnowledgeProcessor{Model: m} processor := knowledge2.KnowledgeProcessor{Model: m}
@ -135,7 +121,7 @@ func main() {
slog.Info("knowledge", "knowledge", knowledge) slog.Info("knowledge", "knowledge", knowledge)
sum, err := processor.ProcessKnowledge(ctx, knowledge) sum, err := processor.Process(ctx, knowledge)
fmt.Println(sum) fmt.Println(sum)
return nil return nil

2
go.mod
View File

@ -8,7 +8,7 @@ replace github.com/rocketlaunchr/google-search => github.com/chrisjoyce911/googl
require ( require (
gitea.stevedudenhoeffer.com/steve/go-extractor v0.0.0-20250318064250-39453288ce2a gitea.stevedudenhoeffer.com/steve/go-extractor v0.0.0-20250318064250-39453288ce2a
gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20250318074538-52533238d385 gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20250321150932-5ba42056adfc
github.com/Edw590/go-wolfram v0.0.0-20241010091529-fb9031908c5d github.com/Edw590/go-wolfram v0.0.0-20241010091529-fb9031908c5d
github.com/advancedlogic/GoOse v0.0.0-20231203033844-ae6b36caf275 github.com/advancedlogic/GoOse v0.0.0-20231203033844-ae6b36caf275
github.com/davecgh/go-spew v1.1.1 github.com/davecgh/go-spew v1.1.1

View File

@ -6,6 +6,8 @@ import (
"log/slog" "log/slog"
"net/url" "net/url"
"strings" "strings"
"sync"
"time"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/reader" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/reader"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared" "gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
@ -14,9 +16,6 @@ import (
gollm "gitea.stevedudenhoeffer.com/steve/go-llm" gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
) )
// kMaxRuns is the maximum number of calls into the LLM this agent will make.
const kMaxRuns = 30
type Result struct { type Result struct {
// Answer is the answer to the question that was asked. // Answer is the answer to the question that was asked.
Answer string Answer string
@ -32,29 +31,14 @@ type Agent struct {
// Model is the chat completion model to use // Model is the chat completion model to use
Model gollm.ChatCompletion Model gollm.ChatCompletion
// OnGoingToNextPage is called when the agent is going to the next page
OnGoingToNextPage func(ctx context.Context) error
// OnReadingSearchResult is called when the agent is reading a search result
// url is the URL of the search result that is being read.
// The return value is any data that you want to pass to OnFinishedReadingSearchResult.
OnReadingSearchResult func(ctx context.Context, searchResult duckduckgo.Result) (any, error)
// OnFinishedReadingSearchResult is called when the agent is finished reading a search result.
// newKnowledge is the knowledge that was gained from reading the search result.
// err is any error that occurred while reading the search result.
// onReadingResult is the result of the OnReadingSearchResult function from the same search result.
OnFinishedReadingSearchResult func(ctx context.Context, searchResult duckduckgo.Result, newKnowledge []string, err error, onReadingResult any) error
OnDone func(ctx context.Context, knowledge shared.Knowledge) error OnDone func(ctx context.Context, knowledge shared.Knowledge) error
// MaxReads is the maximum number of pages that can be read by the agent. Unlimited if <= 0. // MaxReads is the maximum number of pages that can be read by the agent. Unlimited if <= 0.
MaxReads int MaxReads int
// MaxNextResults is the maximum number of times that the next_results function can be called. Unlimited if <= 0.
MaxNextResults int
ContextualInformation []string ContextualInformation []string
AllowConcurrent bool
} }
// Search will search duckduckgo for the given question, and then read the results to figure out the answer. // Search will search duckduckgo for the given question, and then read the results to figure out the answer.
@ -67,7 +51,6 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string)
OriginalQuestions: []string{question}, OriginalQuestions: []string{question},
RemainingQuestions: []string{question}, RemainingQuestions: []string{question},
} }
var done = false
browser, ok := ctx.Value("browser").(extractor.Browser) browser, ok := ctx.Value("browser").(extractor.Browser)
if !ok { if !ok {
@ -87,15 +70,11 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string)
} }
page, err := cfg.OpenSearch(ctx, browser, searchQuery) page, err := cfg.OpenSearch(ctx, browser, searchQuery)
defer deferClose(page)
if err != nil { if err != nil {
return knowledge, err return knowledge, err
} }
defer deferClose(page)
var numberOfReads int
var numberOfNextResults int
var searchResults []duckduckgo.Result var searchResults []duckduckgo.Result
// filterResults will remove any search results that are in oldSearchResults, or are empty // filterResults will remove any search results that are in oldSearchResults, or are empty
@ -112,177 +91,160 @@ func (a Agent) Search(ctx context.Context, searchQuery string, question string)
return res return res
} }
_ = page.LoadMore()
time.Sleep(2 * time.Second)
searchResults = filterResults(page.GetResults()) searchResults = filterResults(page.GetResults())
fnNextResults := gollm.NewFunction( var toRead = make(chan int, a.MaxReads)
"next_results",
"get the next page of search results",
func(c *gollm.Context,
arg struct {
Ignored string `description:"This is ignored, only included for API requirements."`
}) (string, error) {
if numberOfNextResults >= a.MaxNextResults && a.MaxNextResults > 0 {
return "Max next results reached", nil
}
numberOfNextResults++ fnReadSearchResult := gollm.NewFunction("read",
"read the search result and see if it answers the question",
searchResults = append(searchResults, filterResults(page.GetResults())...)
// clamp it to the 30 most recent results
if len(searchResults) > 30 {
// remove the first len(searchResults) - 30 elements
searchResults = searchResults[len(searchResults)-30:]
}
return "Got more search results", nil
},
)
fnReadSearchResult := gollm.NewFunction(
"read",
"go to the next page of search results",
func(c *gollm.Context, arg struct { func(c *gollm.Context, arg struct {
Num int `description:"The # of the search result to read."` Num int `description:"The # of the search result to read."`
}) (string, error) { }) (string, error) {
if numberOfReads >= a.MaxReads && a.MaxReads > 0 { toRead <- arg.Num - 1
return "Max reads reached", nil
}
numberOfReads++
r := reader.Agent{
Model: a.Model,
ContextualInformation: a.ContextualInformation,
}
// num is 1 based, we need 0 based
num := arg.Num - 1
// now ensure bounds are good
if num < 0 || num >= len(searchResults) {
return "", fmt.Errorf("search result %d is out of bounds", num)
}
sr := searchResults[num]
// remove that search result from the list
searchResults = append(searchResults[:num], searchResults[num+1:]...)
u, err := url.Parse(sr.URL)
if err != nil {
return "", err
}
var onReadingResult any
if a.OnReadingSearchResult != nil {
onReadingResult, err = a.OnReadingSearchResult(ctx, sr)
if err != nil {
return "", err
}
}
response, err := r.Read(c, question, u)
if err != nil {
return "", err
}
if a.OnFinishedReadingSearchResult != nil {
var newKnowledge []string
for _, k := range response.Knowledge {
newKnowledge = append(newKnowledge, k.Info)
}
err = a.OnFinishedReadingSearchResult(ctx, sr, newKnowledge, err, onReadingResult)
if err != nil {
return "", err
}
}
slog.Info("read finished", "url", u, "knowledge gained", response.Knowledge, "remaining", response.RemainingQuestions)
knowledge.Knowledge = append(knowledge.Knowledge, response.Knowledge...)
knowledge.RemainingQuestions = response.RemainingQuestions
return "ok", nil return "ok", nil
}) })
fnDone := gollm.NewFunction( readSource := func(ctx context.Context, src duckduckgo.Result) (shared.Knowledge, error) {
"done", r := reader.Agent{
"finish reading search results", Model: a.Model,
func(c *gollm.Context, arg struct { ContextualInformation: a.ContextualInformation,
Ignored string `description:"This is ignored, only included for API requirements."`
}) (string, error) {
done = true
return "ok", nil
})
for i := 0; i < kMaxRuns && !done; i++ {
tools := gollm.NewToolBox(fnDone)
if numberOfReads < a.MaxReads || a.MaxReads <= 0 {
tools = tools.WithFunction(*fnReadSearchResult)
} }
if numberOfNextResults < a.MaxNextResults || a.MaxNextResults <= 0 { u, err := url.Parse(src.URL)
tools = tools.WithFunction(*fnNextResults) if err != nil {
return shared.Knowledge{}, err
} }
var req = gollm.Request{ slog.Info("reading search result", "url", u)
Toolbox: tools, response, err := r.Read(ctx, question, u)
if err != nil {
return shared.Knowledge{}, err
} }
req.Messages = append(req.Messages, gollm.Message{ return response, nil
Role: gollm.RoleSystem, }
Text: `You are searching DuckDuckGo for the answer to the question that will be posed by the user. The results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance:
tools := gollm.NewToolBox(fnReadSearchResult)
var req = gollm.Request{
Toolbox: tools,
}
req.Messages = append(req.Messages, gollm.Message{
Role: gollm.RoleSystem,
Text: `You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance:
1. "https://example.com" - "Example Title" - "This is an example description." 1. "https://example.com" - "Example Title" - "This is an example description."
2. "https://example2.com" - "Example Title 2" - "This is an example description 2."`, 2. "https://example2.com" - "Example Title 2" - "This is an example description 2."`,
}) })
if a.MaxReads == 0 {
a.MaxReads = 100
}
req.Messages = append(req.Messages, gollm.Message{
Role: gollm.RoleSystem,
Text: fmt.Sprintf(`You can read a search result by using the function "read_search_result" with the # of the page to read, it will attempt to read the page, and then an LLM will read the page and see if it answers the question.
can call read_search_result multiple times, up to %d times. All sources you read will be evaulated to see if they answer the question in full or at least in part.`, a.MaxReads),
})
if len(a.ContextualInformation) > 0 {
req.Messages = append(req.Messages, gollm.Message{ req.Messages = append(req.Messages, gollm.Message{
Role: gollm.RoleSystem, Role: gollm.RoleSystem,
Text: fmt.Sprintf(`You can read a search result by using the function "read_search_result" with the # of the page to read, Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"),
it will attempt to read the page, and then an LLM will read the page and see if it answers the question. The return value will be if there was an answer or not. You only have %d reads left of your original %d. Try to only pick high quality search results to read.
If you need to see more results from DuckDuckGo you can run the function "next_results" to get the next page of results. You only have %d next_results left of your original %d.
You can also use the function "done" to give up on reading the search results and finish executing, indicating you either have nothing left to answer or do not think any of the sources left will answer.`, max(a.MaxReads-numberOfReads, 0), a.MaxReads, max(a.MaxNextResults-numberOfNextResults, 0), a.MaxNextResults),
}) })
}
if len(a.ContextualInformation) > 0 { searches := ""
req.Messages = append(req.Messages, gollm.Message{ for i, r := range searchResults {
Role: gollm.RoleSystem, if i > 0 {
Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"), searches += "\n"
})
} }
searches := "" searches += fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description)
for i, r := range searchResults { }
if i > 0 {
searches += "\n"
}
searches += fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description) req.Messages = append(req.Messages, gollm.Message{
Role: gollm.RoleSystem,
Text: "Search results are:\n" + searches,
})
results, err := a.Model.ChatComplete(ctx, req)
if err != nil {
return knowledge, err
}
if len(results.Choices) == 0 {
return knowledge, fmt.Errorf("no choices were returned")
}
choice := results.Choices[0]
// enforce the maximum number of reads
calls := choice.Calls
if len(calls) > a.MaxReads {
slog.Warn("too many calls, trimming to max", "len", len(calls), "max", a.MaxReads)
calls = calls[:a.MaxReads]
}
_, err = tools.ExecuteCallbacks(gollm.NewContext(ctx, req, &choice, nil), choice.Calls, nil, nil)
if err != nil {
return knowledge, err
}
close(toRead)
// make sure there are no duplicates
var uniques = map[int]struct{}{}
for i := range toRead {
uniques[i] = struct{}{}
}
var sources []duckduckgo.Result
for k := range uniques {
if k < 0 || k >= len(searchResults) {
slog.Warn("search result index out of range", "index", k, "len", len(searchResults))
continue
}
sources = append(sources, searchResults[k])
}
type result struct {
Knowledge shared.Knowledge
Err error
}
var gainedKnowledge = make(chan result, len(sources))
wg := sync.WaitGroup{}
for _, v := range sources {
wg.Add(1)
go func() {
res, err := readSource(ctx, v)
slog.Info("read search result", "url", v.URL, "err", err)
gainedKnowledge <- result{Knowledge: res, Err: err}
wg.Done()
}()
}
slog.Info("reading search results", "len", len(sources))
wg.Wait()
close(gainedKnowledge)
slog.Info("done reading search results", "len", len(gainedKnowledge))
for r := range gainedKnowledge {
if r.Err != nil {
slog.Info("error reading search result", "err", r.Err)
continue
} }
req.Messages = append(req.Messages, gollm.Message{ knowledge.Knowledge = append(knowledge.Knowledge, r.Knowledge.Knowledge...)
Role: gollm.RoleSystem, knowledge.RemainingQuestions = append(knowledge.RemainingQuestions, r.Knowledge.RemainingQuestions...)
Text: "Search results are:\n" + searches,
})
results, err := a.Model.ChatComplete(ctx, req)
if err != nil {
return knowledge, err
}
if len(results.Choices) == 0 {
break
}
choice := results.Choices[0]
_, err = tools.ExecuteCallbacks(gollm.NewContext(ctx, req, &choice, nil), choice.Calls, nil, nil)
if err != nil {
return knowledge, err
}
} }
if a.OnDone != nil { if a.OnDone != nil {