Introduce multiple agents, tools, and utilities for processing, extracting, and answering user-provided questions using LLMs and external data. Key features include knowledge processing, question splitting, search term generation, and contextual knowledge handling.
654 lines
16 KiB
Go
654 lines
16 KiB
Go
package answer
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/url"
|
|
"slices"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agent"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/answer/pkg/search"
|
|
|
|
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
|
)
|
|
|
|
const (
|
|
kMaxLoops = 10
|
|
kMaxReads = 10
|
|
kMaxLoadMore = 3
|
|
)
|
|
|
|
type searchResults struct {
|
|
Url string `json:"url"`
|
|
Answer string `json:"answer"`
|
|
}
|
|
|
|
func (s searchResults) String() (string, error) {
|
|
b, err := json.Marshal(s)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return string(b), nil
|
|
}
|
|
|
|
func pickResult(ctx *gollm.Context, results []search.Result, q Question) (*search.Result, error) {
|
|
// if there's only one result, return it
|
|
if len(results) == 1 {
|
|
return &results[0], nil
|
|
}
|
|
|
|
// if there are no results, return nil
|
|
if len(results) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
var pick *search.Result
|
|
var refused bool
|
|
// finally, if there are multiple results then ask the LLM to pick one to read next
|
|
fnPick := gollm.NewFunction(
|
|
"pick",
|
|
"The search result to read next.",
|
|
func(ctx *gollm.Context, args struct {
|
|
URL string `description:"the url to read next"`
|
|
}) (string, error) {
|
|
for _, r := range results {
|
|
if r.URL == args.URL {
|
|
pick = &r
|
|
break
|
|
}
|
|
}
|
|
|
|
return "", nil
|
|
})
|
|
|
|
fnNoPick := gollm.NewFunction(
|
|
"no_pick",
|
|
"Indicate that there are no results worth reading.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Ignored string `description:"ignored, just here to make sure the function is called. Fill with anything."`
|
|
}) (string, error) {
|
|
refused = true
|
|
return "", nil
|
|
})
|
|
|
|
req := gollm.Request{
|
|
Messages: []gollm.Message{
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: `You are being given results from a web search. Please select the result you would like to read next to answer the question. Try to pick the most reputable and relevant result.
|
|
The results will be in the JSON format of: {"Url": "https://url.here", "Title": "Title Of Search", "Description": "description here"}`,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: "The question you are trying to answer is: " + q.Question,
|
|
},
|
|
},
|
|
Toolbox: gollm.NewToolBox(fnPick, fnNoPick),
|
|
}
|
|
|
|
for _, r := range results {
|
|
b, _ := json.Marshal(r)
|
|
req.Messages = append(req.Messages, gollm.Message{
|
|
Role: gollm.RoleUser,
|
|
Text: string(b),
|
|
})
|
|
}
|
|
|
|
res, err := q.Model.ChatComplete(ctx, req)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(res.Choices) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
if len(res.Choices[0].Calls) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
_, _ = req.Toolbox.Execute(ctx, res.Choices[0].Calls[0])
|
|
|
|
if refused {
|
|
return nil, nil
|
|
}
|
|
|
|
return pick, nil
|
|
}
|
|
|
|
func internalSearch(ctx *gollm.Context, q Question, searchTerm string) (searchResults, error) {
|
|
slog.Info("searching", "search", searchTerm, "question", q)
|
|
results, err := q.Search.Search(ctx, searchTerm)
|
|
if err != nil {
|
|
return searchResults{}, err
|
|
}
|
|
|
|
if len(results) == 0 {
|
|
return searchResults{Url: "not-found", Answer: "no search results found"}, nil
|
|
}
|
|
|
|
for len(results) > 0 {
|
|
var pick *search.Result
|
|
if len(results) == 1 {
|
|
pick = &results[0]
|
|
results = results[1:]
|
|
} else {
|
|
var err error
|
|
pick, err = pickResult(ctx, results, q)
|
|
|
|
slog.Info("picked result", "result", pick, "error", err)
|
|
if err != nil {
|
|
return searchResults{}, err
|
|
}
|
|
|
|
if pick == nil {
|
|
break
|
|
}
|
|
}
|
|
|
|
trimmed := strings.TrimSpace(pick.URL)
|
|
if trimmed == "" {
|
|
|
|
}
|
|
|
|
slog.Info("extracting article", "url", trimmed)
|
|
u, err := url.Parse(trimmed)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
a, err := extractArticle(ctx, q.Cache, u)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
slog.Info("extracted article", "url", a.URL, "title", a.Title, "body", a.Body)
|
|
|
|
if a.Title != "" && a.Body != "" {
|
|
answer, err := doesTextAnswerQuestion(ctx, q, a.Body)
|
|
|
|
if err != nil {
|
|
slog.Error("error checking if text answers question", "question", q.Question, "error", err)
|
|
continue
|
|
}
|
|
|
|
if answer != "" {
|
|
return searchResults{Url: u.String(), Answer: answer}, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return searchResults{Url: "not-found", Answer: "no searched results answered"}, nil
|
|
}
|
|
|
|
type searchResults2 struct {
|
|
Answer string `json:"answer"`
|
|
Urls []string `json:"urls"`
|
|
}
|
|
|
|
func (r searchResults2) String() (string, error) {
|
|
b, err := json.Marshal(r)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return string(b), nil
|
|
}
|
|
func functionSearch2(ctx *gollm.Context, q Question, searchTerm string) (searchResults2, error) {
|
|
var res searchResults2
|
|
browser, ok := ctx.Value("browser").(extractor.Browser)
|
|
if !ok {
|
|
return searchResults2{}, fmt.Errorf("browser not found in context")
|
|
}
|
|
|
|
cfg := duckduckgo.Config{
|
|
SafeSearch: duckduckgo.SafeSearchOff,
|
|
Region: "us-en",
|
|
}
|
|
|
|
page, err := cfg.OpenSearch(ctx, browser, searchTerm)
|
|
defer deferClose(page)
|
|
if err != nil {
|
|
return searchResults2{}, fmt.Errorf("failed to open search page: %w", err)
|
|
}
|
|
|
|
var totalNextPage int
|
|
var totalRead int
|
|
|
|
// oldResults are all the old results from the previous pages, so that when we load more we can filter out
|
|
// the old results
|
|
var oldResults []duckduckgo.Result
|
|
|
|
filterResults := func(results []duckduckgo.Result) []duckduckgo.Result {
|
|
var res []duckduckgo.Result
|
|
for _, r := range results {
|
|
if r.Title == "" || r.Description == "" {
|
|
continue
|
|
}
|
|
|
|
if slices.Contains(oldResults, r) {
|
|
continue
|
|
}
|
|
|
|
res = append(res, r)
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
a := agent.NewAgent(gollm.Request{
|
|
Messages: []gollm.Message{
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: `You are trying to answer a question by reading pages from a search engine.
|
|
Use 'read' to read a page. You can only read 10 pages total, so try to only pick high quality pages. Results of a read will be in the format of {"url": "https://url.here", "answer": "answer here"}.
|
|
Additionally, you can use 'next_page' to load more results. You can only use next_page 3 times total.
|
|
You can read multiple pages at once, or read one page and continue to the next page if you need more information.
|
|
But if you are confident in your answer, you can use 'answer' to provide the answer.
|
|
Or you can use 'give_up' to indicate that you cannot find an answer and give up.`,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: "The question you are trying to answer is: " + q.Question,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: "The search terms you used were: " + searchTerm,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: `The search results will be provided by the user in json format of: {"url": "https://url.here", "title": "Title Of Page", "description": "description here"}`,
|
|
},
|
|
},
|
|
})
|
|
|
|
a.Model = q.Model
|
|
|
|
var giveup bool
|
|
|
|
addMessages := func(results []duckduckgo.Result) {
|
|
type searchResults struct {
|
|
Url string `json:"url"`
|
|
Title string `json:"title"`
|
|
Desc string `json:"description"`
|
|
}
|
|
for _, r := range results {
|
|
b, _ := json.Marshal(&searchResults{Url: r.URL, Title: r.Title, Desc: r.Description})
|
|
a.AddMessage(gollm.Message{
|
|
Role: gollm.RoleUser,
|
|
Text: string(b),
|
|
})
|
|
}
|
|
}
|
|
|
|
fnRead := gollm.NewFunction(
|
|
"read",
|
|
`Read a page from the search results. The results will be in the JSON format of: {"url": "https://url.here", "answer": "answer here"}`,
|
|
func(ctx *gollm.Context, args struct {
|
|
URL string `description:"the url to read"`
|
|
}) (string, error) {
|
|
slog.Info("read", "url", args.URL)
|
|
if totalRead >= kMaxReads {
|
|
return "you have read the maximum number of pages", nil
|
|
}
|
|
|
|
totalRead += 1
|
|
|
|
u, err := url.Parse(args.URL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to parse url: %w", err)
|
|
}
|
|
|
|
a, err := extractArticle(ctx, q.Cache, u)
|
|
slog.Info("extracted article", "url", a.URL, "title", a.Title, "body", a.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to extract article: %w", err)
|
|
}
|
|
|
|
if a.Title == "" || a.Body == "" {
|
|
return "couldn't read the page", nil
|
|
}
|
|
|
|
answer, err := doesTextAnswerQuestion(ctx, q, a.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to check if text answers question: %w", err)
|
|
}
|
|
|
|
var res = searchResults{
|
|
Url: u.String(),
|
|
Answer: answer,
|
|
}
|
|
|
|
return res.String()
|
|
})
|
|
|
|
fnNextPage := gollm.NewFunction(
|
|
"next_page",
|
|
"Load more results from the search engine.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Ignored string `description:"ignored, just here to make sure the function is called. Fill with anything."`
|
|
}) (string, error) {
|
|
if totalNextPage >= kMaxLoadMore {
|
|
return "you have loaded the maximum number of pages", nil
|
|
}
|
|
|
|
totalNextPage += 1
|
|
|
|
err := page.LoadMore()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to load more results: %w", err)
|
|
}
|
|
|
|
time.Sleep(4 * time.Second)
|
|
|
|
results := page.GetResults()
|
|
|
|
// only add the new results here...
|
|
filteredResults := filterResults(results)
|
|
oldResults = append(oldResults, filteredResults...)
|
|
addMessages(filteredResults)
|
|
return "ok", nil
|
|
})
|
|
|
|
fnAnswer := gollm.NewFunction(
|
|
"answer",
|
|
"Provide the answer to the question.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Answer string `description:"the answer to the question"`
|
|
Sources []string `description:"the urls of sources used to find the answer"`
|
|
}) (string, error) {
|
|
res.Answer = args.Answer
|
|
res.Urls = args.Sources
|
|
giveup = true
|
|
return "ok", nil
|
|
})
|
|
|
|
fnGiveUp := gollm.NewFunction(
|
|
"give_up",
|
|
"Indicate that you cannot find an answer and give up.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Ignored string `description:"ignored, just here to make sure the function is called. Fill with anything."`
|
|
}) (string, error) {
|
|
giveup = true
|
|
return "ok", nil
|
|
})
|
|
|
|
// do initial load of results
|
|
results := page.GetResults()
|
|
filteredResults := filterResults(results)
|
|
oldResults = append(oldResults, filteredResults...)
|
|
addMessages(filteredResults)
|
|
|
|
var i = 0
|
|
for ; i < kMaxLoops && !giveup; i++ {
|
|
// figure out my allowed tools, based on limits
|
|
var tools = []*gollm.Function{
|
|
fnAnswer,
|
|
fnGiveUp,
|
|
}
|
|
|
|
if totalRead < kMaxReads {
|
|
tools = append(tools, fnRead)
|
|
}
|
|
|
|
if totalNextPage < kMaxLoadMore {
|
|
tools = append(tools, fnNextPage)
|
|
}
|
|
|
|
a.ToolBox = gollm.NewToolBox(tools...)
|
|
|
|
err = a.Execute(ctx, gollm.Message{Role: gollm.RoleSystem, Text: "Now evaluate if the text answers the question, and use a function to either provide the answer or read more pages."})
|
|
if err != nil {
|
|
return searchResults2{}, fmt.Errorf("failed to run agent: %w", err)
|
|
}
|
|
}
|
|
|
|
if giveup {
|
|
return res, fmt.Errorf("gave up: no relevant results found")
|
|
}
|
|
|
|
if res.Answer == "" {
|
|
return res, fmt.Errorf("no answer found")
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
func functionSearch(ctx *gollm.Context, q Question, searchTerm string) (searchResults2, error) {
|
|
var res searchResults2
|
|
browser, ok := ctx.Value("browser").(extractor.Browser)
|
|
if !ok {
|
|
return searchResults2{}, fmt.Errorf("browser not found in context")
|
|
}
|
|
|
|
cfg := duckduckgo.Config{
|
|
SafeSearch: duckduckgo.SafeSearchOff,
|
|
Region: "us-en",
|
|
}
|
|
|
|
page, err := cfg.OpenSearch(ctx, browser, searchTerm)
|
|
defer deferClose(page)
|
|
if err != nil {
|
|
return searchResults2{}, fmt.Errorf("failed to open search page: %w", err)
|
|
}
|
|
|
|
var totalNextPage int
|
|
var totalRead int
|
|
|
|
// oldResults are all the old results from the previous pages, so that when we load more we can filter out
|
|
// the old results
|
|
var oldResults []duckduckgo.Result
|
|
|
|
filterResults := func(results []duckduckgo.Result) []duckduckgo.Result {
|
|
var res []duckduckgo.Result
|
|
for _, r := range results {
|
|
if r.Title == "" || r.Description == "" {
|
|
continue
|
|
}
|
|
|
|
if slices.Contains(oldResults, r) {
|
|
continue
|
|
}
|
|
|
|
res = append(res, r)
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
var giveup bool
|
|
req := gollm.Request{
|
|
Messages: []gollm.Message{
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: `You are trying to answer a question by reading pages from a search engine.
|
|
Use 'read' to read a page. You can only read 10 pages total, so try to only pick high quality pages.
|
|
Additionally, you can use 'next_page' to load more results. You can only use next_page 3 times total.
|
|
You can read multiple pages at once, or read one page and continue to the next page if you need more information.
|
|
But if you are confident in your answer, you can use 'answer' to provide the answer.
|
|
Or you can use 'give_up' to indicate that you cannot find an answer and give up.`,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: "The question you are trying to answer is: " + q.Question,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: "The search terms you used were: " + searchTerm,
|
|
},
|
|
{
|
|
Role: gollm.RoleSystem,
|
|
Text: `The search results will be provided by the user in json format of: {"url": "https://url.here", "title": "Title Of Page", "description": "description here"}`,
|
|
},
|
|
},
|
|
}
|
|
|
|
addMessages := func(results []duckduckgo.Result) {
|
|
type searchResults struct {
|
|
Url string `json:"url"`
|
|
Title string `json:"title"`
|
|
Desc string `json:"description"`
|
|
}
|
|
for _, r := range results {
|
|
b, _ := json.Marshal(&searchResults{Url: r.URL, Title: r.Title, Desc: r.Description})
|
|
req.Messages = append(req.Messages, gollm.Message{
|
|
Role: gollm.RoleUser,
|
|
Text: string(b),
|
|
})
|
|
}
|
|
}
|
|
|
|
fnRead := gollm.NewFunction(
|
|
"read",
|
|
`Read a page from the search results. The results will be in the JSON format of: {"url": "https://url.here", "answer": "answer here"}`,
|
|
func(ctx *gollm.Context, args struct {
|
|
URL string `description:"the url to read"`
|
|
}) (string, error) {
|
|
if totalRead >= kMaxReads {
|
|
return "you have read the maximum number of pages", nil
|
|
}
|
|
|
|
totalRead += 1
|
|
|
|
u, err := url.Parse(args.URL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to parse url: %w", err)
|
|
}
|
|
|
|
a, err := extractArticle(ctx, q.Cache, u)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to extract article: %w", err)
|
|
}
|
|
|
|
if a.Title == "" || a.Body == "" {
|
|
return "couldn't read the page", nil
|
|
}
|
|
|
|
answer, err := doesTextAnswerQuestion(ctx, q, a.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to check if text answers question: %w", err)
|
|
}
|
|
|
|
var res = searchResults{
|
|
Url: u.String(),
|
|
Answer: answer,
|
|
}
|
|
|
|
return res.String()
|
|
})
|
|
|
|
fnNextPage := gollm.NewFunction(
|
|
"next_page",
|
|
"Load more results from the search engine.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Ignored string `description:"ignored, just here to make sure the function is called. Fill with anything."`
|
|
}) (string, error) {
|
|
if totalNextPage >= kMaxLoadMore {
|
|
return "you have loaded the maximum number of pages", nil
|
|
}
|
|
|
|
totalNextPage += 1
|
|
|
|
err := page.LoadMore()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to load more results: %w", err)
|
|
}
|
|
|
|
time.Sleep(4 * time.Second)
|
|
|
|
results := page.GetResults()
|
|
|
|
// only add the new results here...
|
|
filteredResults := filterResults(results)
|
|
oldResults = append(oldResults, filteredResults...)
|
|
addMessages(filteredResults)
|
|
return "ok", nil
|
|
})
|
|
|
|
fnAnswer := gollm.NewFunction(
|
|
"answer",
|
|
"Provide the answer to the question.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Answer string `description:"the answer to the question"`
|
|
Sources []string `description:"the urls of sources used to find the answer"`
|
|
}) (string, error) {
|
|
res.Answer = args.Answer
|
|
res.Urls = args.Sources
|
|
giveup = true
|
|
return "ok", nil
|
|
})
|
|
|
|
fnGiveUp := gollm.NewFunction(
|
|
"give_up",
|
|
"Indicate that you cannot find an answer and give up.",
|
|
func(ctx *gollm.Context, args struct {
|
|
Ignored string `description:"ignored, just here to make sure the function is called. Fill with anything."`
|
|
}) (string, error) {
|
|
giveup = true
|
|
return "ok", nil
|
|
})
|
|
|
|
// do initial load of results
|
|
results := page.GetResults()
|
|
filteredResults := filterResults(results)
|
|
oldResults = append(oldResults, filteredResults...)
|
|
addMessages(filteredResults)
|
|
|
|
var i = 0
|
|
for ; i < kMaxLoops && !giveup; i++ {
|
|
// figure out my allowed tools, based on limits
|
|
var tools = []*gollm.Function{
|
|
fnAnswer,
|
|
fnGiveUp,
|
|
}
|
|
|
|
if totalRead < kMaxReads {
|
|
tools = append(tools, fnRead)
|
|
}
|
|
|
|
if totalNextPage < kMaxLoadMore {
|
|
tools = append(tools, fnNextPage)
|
|
}
|
|
|
|
req.Toolbox = gollm.NewToolBox(tools...)
|
|
|
|
res, err := q.Model.ChatComplete(ctx, req)
|
|
if err != nil {
|
|
return searchResults2{}, fmt.Errorf("failed to chat complete: %w", err)
|
|
}
|
|
|
|
if len(res.Choices) == 0 {
|
|
break
|
|
}
|
|
|
|
if len(res.Choices[0].Calls) == 0 {
|
|
break
|
|
}
|
|
|
|
_, err = req.Toolbox.Execute(ctx, res.Choices[0].Calls[0])
|
|
if err != nil {
|
|
return searchResults2{}, fmt.Errorf("failed to execute: %w", err)
|
|
}
|
|
}
|
|
|
|
if giveup {
|
|
return res, fmt.Errorf("gave up: no relevant results found")
|
|
}
|
|
|
|
if res.Answer == "" {
|
|
return res, fmt.Errorf("no answer found")
|
|
}
|
|
|
|
return res, nil
|
|
}
|