Consolidated a bunch of reused code to agents
This commit is contained in:
216
pkg/agents/search.go
Normal file
216
pkg/agents/search.go
Normal file
@@ -0,0 +1,216 @@
|
||||
package agents
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/url"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
|
||||
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
||||
)
|
||||
|
||||
func deferClose(c io.Closer) {
|
||||
if c != nil {
|
||||
_ = c.Close()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type SearchTool struct {
|
||||
Name string
|
||||
Description string
|
||||
Function func(ctx context.Context, src *url.URL, questions []string) (Knowledge, error)
|
||||
}
|
||||
|
||||
// SearchAndUseTools will search duckduckgo for the given question, and then ask the LLM to select a search result to
|
||||
// analyze. The LLM will be given a list of tools to use to analyze the search result, and then the LLM will be asked to
|
||||
// determine if the search results answers the question.
|
||||
// If the context contains a "browser" key that is an extractor.Browser, it will use that browser to search, otherwise a
|
||||
// new one will be created and used for the life of this search and then closed.
|
||||
// searchQuery is the question to search for.
|
||||
// questions is the list of questions that the LLM is trying to answer with the search results.
|
||||
// loops is the number of times to ask the LLM to analyze results if there are remaining questions before giving up.
|
||||
// readers is a list of functions that will be used to read the search results. Any knowledge gained from these readers
|
||||
// will be combined and returned.
|
||||
// messages will be appended to all search results. The types of messages that can be appended are both string and
|
||||
// gollm.Message.
|
||||
func (a Agent) SearchAndUseTools(ctx context.Context, searchQuery string, questions []string, loops int, allowConcurrent bool, maxReads int, tools []SearchTool, messages ...any) (Knowledge, error) {
|
||||
var knowledge = Knowledge{
|
||||
OriginalQuestions: questions,
|
||||
RemainingQuestions: questions,
|
||||
}
|
||||
|
||||
browser, ok := ctx.Value("browser").(extractor.Browser)
|
||||
if !ok {
|
||||
b, err := extractor.NewPlayWrightBrowser(extractor.PlayWrightBrowserOptions{})
|
||||
if err != nil {
|
||||
return knowledge, err
|
||||
}
|
||||
|
||||
defer deferClose(browser)
|
||||
ctx = context.WithValue(ctx, "browser", b)
|
||||
browser = b
|
||||
}
|
||||
|
||||
cfg := duckduckgo.Config{
|
||||
SafeSearch: duckduckgo.SafeSearchOff,
|
||||
Region: "us-en",
|
||||
}
|
||||
|
||||
page, err := cfg.OpenSearch(ctx, browser, searchQuery)
|
||||
defer deferClose(page)
|
||||
if err != nil {
|
||||
return knowledge, err
|
||||
}
|
||||
|
||||
var searchResults []duckduckgo.Result
|
||||
|
||||
// filterResults will remove any search results that are in oldSearchResults, or are empty
|
||||
filterResults := func(in []duckduckgo.Result) []duckduckgo.Result {
|
||||
var res []duckduckgo.Result
|
||||
for _, r := range in {
|
||||
if r.URL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, r)
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
if maxReads == 0 {
|
||||
maxReads = 100
|
||||
}
|
||||
|
||||
var lock sync.Mutex
|
||||
var analyzed []int
|
||||
var converted []*gollm.Function
|
||||
|
||||
for _, t := range tools {
|
||||
fn := gollm.NewFunction(t.Name, t.Description,
|
||||
func(c *gollm.Context, arg struct {
|
||||
Num int `description:"The # of search result to analyze."`
|
||||
}) (any, error) {
|
||||
i := arg.Num - 1
|
||||
|
||||
defer func() {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
analyzed = append(analyzed, i)
|
||||
}()
|
||||
|
||||
if i < 0 || i >= len(searchResults) {
|
||||
return nil, fmt.Errorf("index out of range: expect 1-%d", len(searchResults))
|
||||
}
|
||||
|
||||
u, err := url.Parse(searchResults[i].URL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing url: %w", err)
|
||||
}
|
||||
|
||||
return t.Function(c.Context, u, questions)
|
||||
})
|
||||
converted = append(converted, fn)
|
||||
}
|
||||
|
||||
for i := 0; i < loops; i++ {
|
||||
// if any search results have already been analyzed, remove them
|
||||
// but to make this easier, sort the list of analyzed results descending so they can be removed in order
|
||||
// without changing the indexes of the remaining results
|
||||
|
||||
// but first remove any duplicates
|
||||
var unique = map[int]struct{}{}
|
||||
for _, v := range analyzed {
|
||||
unique[v] = struct{}{}
|
||||
}
|
||||
|
||||
analyzed = analyzed[:0]
|
||||
for k, _ := range unique {
|
||||
analyzed = append(analyzed, k)
|
||||
}
|
||||
|
||||
slices.Sort(analyzed)
|
||||
for j := len(analyzed) - 1; j >= 0; j-- {
|
||||
searchResults = append(searchResults[:analyzed[j]], searchResults[analyzed[j]+1:]...)
|
||||
}
|
||||
|
||||
// remove any search results that have already been analyzed
|
||||
analyzed = analyzed[:0]
|
||||
|
||||
_ = page.LoadMore()
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
searchResults = filterResults(page.GetResults())
|
||||
|
||||
a = a.WithSystemPrompt(`You are searching DuckDuckGo for the answer to the question that will be posed by the user. The search results will be provided in system messages in the format of: #. "https://url.here" - "Title of Page" - "Description here". For instance:
|
||||
1. "https://example.com" - "Example Title" - "This is an example description."
|
||||
2. "https://example2.com" - "Example Title 2" - "This is an example description 2."
|
||||
|
||||
Use appropriate tools to analyze the search results and determine if they answer the question.`).
|
||||
WithSystemPromptSuffix(``).
|
||||
WithToolbox(gollm.NewToolBox(converted...))
|
||||
|
||||
var searches = make([]string, len(searchResults))
|
||||
for i, r := range searchResults {
|
||||
searches[i] = fmt.Sprintf("%d. %q - %q - %q", i+1, r.URL, r.Title, r.Description)
|
||||
}
|
||||
|
||||
if len(searches) > 0 {
|
||||
messages = append(messages, "The search results are:\n"+strings.Join(searches, "\n"))
|
||||
}
|
||||
|
||||
var results CallAndExecuteResults
|
||||
if allowConcurrent {
|
||||
results, err = a.CallAndExecuteParallel(ctx, messages...)
|
||||
} else {
|
||||
results, err = a.CallAndExecute(ctx, messages...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return knowledge, fmt.Errorf("error executing search function: %w", err)
|
||||
}
|
||||
|
||||
var learned []Knowledge
|
||||
for _, r := range results.CallResults {
|
||||
if r.Error != nil {
|
||||
slog.Error("error executing search function", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if k, ok := r.Result.(Knowledge); ok {
|
||||
learned = append(learned, k)
|
||||
} else {
|
||||
slog.Error("result is not knowledge", "result", r.Result)
|
||||
}
|
||||
}
|
||||
|
||||
knowledge, err = a.KnowledgeIntegrate(ctx, knowledge, learned...)
|
||||
if err != nil {
|
||||
return knowledge, fmt.Errorf("error integrating knowledge: %w", err)
|
||||
}
|
||||
|
||||
if len(knowledge.RemainingQuestions) == 0 {
|
||||
return knowledge, nil
|
||||
}
|
||||
}
|
||||
|
||||
return knowledge, nil
|
||||
}
|
||||
|
||||
func (a Agent) SearchAndRead(ctx context.Context, searchQuery string, questions []string, allowConcurrent bool, maxReads int) (Knowledge, error) {
|
||||
return a.SearchAndUseTools(ctx, searchQuery, questions, 2, allowConcurrent, maxReads, []SearchTool{
|
||||
{
|
||||
Name: "ReadPage",
|
||||
Description: "Read the search result and see if it answers the question. Try to avoid using this on low quality or spammy sites. You can use this function" + fmt.Sprint(maxReads) + " times, but do not call it multiple times on the same result.",
|
||||
Function: a.ReadPage,
|
||||
},
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user