From 98fa840f8726ecc02109eab9b36fd90dc041069d Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 8 Nov 2024 20:51:12 -0500 Subject: [PATCH] initial commit --- .gitignore | 0 cmd/answer.go | 36 ++++ go.mod | 60 ++++++ pkg/answer/answer.go | 411 +++++++++++++++++++++++++++++++++++++++++ pkg/cache/cache.go | 15 ++ pkg/cache/directory.go | 160 ++++++++++++++++ pkg/search/google.go | 44 +++++ pkg/search/search.go | 13 ++ 8 files changed, 739 insertions(+) create mode 100644 .gitignore create mode 100644 cmd/answer.go create mode 100644 go.mod create mode 100644 pkg/answer/answer.go create mode 100644 pkg/cache/cache.go create mode 100644 pkg/cache/directory.go create mode 100644 pkg/search/google.go create mode 100644 pkg/search/search.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/cmd/answer.go b/cmd/answer.go new file mode 100644 index 0000000..a780fc2 --- /dev/null +++ b/cmd/answer.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + "github.com/urfave/cli" +) + +func main() { + // Usage: go run cmd/answer.go question... + // - flags: + // --model=[model string such as openai/gpt-4o, anthropic/claude..., google/gemini-1.5. Default: openai/gpt-4o] + // --search-provider=[search provider string such as google, duckduckgo. Default: google] + + var app = cli.App{ + Name: "answer", + Usage: "has an llm search the web for you to answer a question", + Version: "0.1", + Description: "", + + Action: func(c *cli.Context) error { + // if there is no question to answer, print usage + if c.NArg() == 0 { + return cli.ShowAppHelp(c) + } + + // get the question + fmt.Println("Head: ", c.Args().First()) + fmt.Println("Tail: ", c.Args().Tail()) + + return nil + }, + } + + app.Run() + +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fc5de73 --- /dev/null +++ b/go.mod @@ -0,0 +1,60 @@ +module answer + +go 1.23.2 + +replace gitea.stevedudenhoeffer.com/steve/go-llm => ../go-llm + +require ( + cloud.google.com/go v0.115.0 // indirect + cloud.google.com/go/ai v0.8.0 // indirect + cloud.google.com/go/auth v0.6.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect + cloud.google.com/go/compute/metadata v0.3.0 // indirect + cloud.google.com/go/longrunning v0.5.7 // indirect + gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20241031152103-f603010dee49 // indirect + github.com/PuerkitoBio/goquery v1.8.1 // indirect + github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/antchfx/htmlquery v1.3.0 // indirect + github.com/antchfx/xmlquery v1.3.15 // indirect + github.com/antchfx/xpath v1.2.4 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/gobwas/glob v0.2.3 // indirect + github.com/gocolly/colly/v2 v2.1.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/generative-ai-go v0.18.0 // indirect + github.com/google/s2a-go v0.1.7 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect + github.com/googleapis/gax-go/v2 v2.12.5 // indirect + github.com/kennygrant/sanitize v1.2.4 // indirect + github.com/liushuangls/go-anthropic/v2 v2.8.0 // indirect + github.com/rocketlaunchr/google-search v1.1.6 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect + github.com/sashabaranov/go-openai v1.31.0 // indirect + github.com/temoto/robotstxt v1.1.2 // indirect + github.com/urfave/cli v1.22.16 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect + go.opentelemetry.io/otel v1.26.0 // indirect + go.opentelemetry.io/otel/metric v1.26.0 // indirect + go.opentelemetry.io/otel/trace v1.26.0 // indirect + golang.org/x/crypto v0.24.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sync v0.7.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.5.0 // indirect + google.golang.org/api v0.186.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect + google.golang.org/grpc v1.64.1 // indirect + google.golang.org/protobuf v1.34.2 // indirect +) diff --git a/pkg/answer/answer.go b/pkg/answer/answer.go new file mode 100644 index 0000000..3ec7700 --- /dev/null +++ b/pkg/answer/answer.go @@ -0,0 +1,411 @@ +package answer + +import ( + "answer/pkg/cache" + "answer/pkg/search" + "context" + "errors" + "fmt" + go_llm "gitea.stevedudenhoeffer.com/steve/go-llm" + "io" + "log/slog" + "net/http" + "net/url" + "strings" +) + +var ErrMaxTries = errors.New("maximum number of pages tried reached") +var ErrMaxAnswers = errors.New("maximum number of answers parsed reached") +var ErrTooManyArguments = errors.New("too many arguments") + +type Question struct { + // Question is the question to answer + Question string + + Model go_llm.ChatCompletion + + Search search.Search + + Cache cache.Cache +} + +// Answers is a list of answers to a question +type Answers []string + +type Options struct { + // MaxSearches is the maximum possible number of searches to execute for this question. If this is set to 5, the function could + // search up to 5 possible times to find an answer. + MaxSearches int + + // MaxThinks is the maximum number of times to think about a question. A "Think" is different than a search in that + // the LLM just breaks the question down into smaller parts and tries to answer them. This is useful for complex + // questions that are hard to answer since LLMs are better at answering smaller questions. + MaxThinks int + + // MaxTries is the absolute maximum number of pages to try to get an answer from. For instance, if MaxSearches is 5 and + // 5 pages are tried and no answers are found, the function will return ErrMaxTries. + MaxTries int +} + +var DefaultOptions = Options{ + MaxSearches: 5, + MaxThinks: 10, + MaxTries: 5, +} + +type Result struct { + Result string + Error error +} + +func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []go_llm.ToolCall) []Result { + var results []Result + var resultsOutput = make(chan Result, len(calls)) + + fnCall := func(call go_llm.ToolCall) Result { + str, err := toolBox.Execute(ctx, call) + if err != nil { + return Result{ + Error: err, + } + } + + return Result{ + Result: str, + } + } + + for _, call := range calls { + go func(call go_llm.ToolCall) { + resultsOutput <- fnCall(call) + }(call) + } + + for i := 0; i < len(calls); i++ { + result := <-resultsOutput + results = append(results, result) + } + + close(resultsOutput) + + return results +} + +type article struct { + URL string + Title string + Body string +} + +func extractArticle(ctx context.Context, u *url.URL) (res article, err error) { + defer func() { + e := recover() + + if e != nil { + if e, ok := e.(error); ok { + err = fmt.Errorf("panic: %w", e) + } else { + err = fmt.Errorf("panic: %v", e) + } + } + }() + + req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) + + if err != nil { + return res, fmt.Errorf("error creating request: %w", err) + } + + resp, err := c.cl.Do(req) + + if err != nil { + return res, fmt.Errorf("error getting response: %w", err) + } + + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + slog.Error("error closing body", "error", err) + } + }(resp.Body) + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("bad response: %d: %s", resp.StatusCode, resp.Status) + } + + b, err := io.ReadAll(resp.Body) + + if err != nil { + return "", fmt.Errorf("error reading body: %w", err) + } + + g := goose.New() + article, err := g.ExtractFromRawHTML(string(b), target) + + if err != nil { + return "", fmt.Errorf("error extracting article: %w", err) + } + + return article.CleanedText, nil + panic("not implemented") + return article{}, nil +} + +func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (string, error) { + fnAnswer := go_llm.NewFunction( + "answer", + "The answer from the given text that answers the question.", + func(ctx context.Context, args struct { + Answer string `description:"the answer to the question, the answer should come from the text"` + }) (string, error) { + return args.Answer, nil + }) + + fnNoAnswer := go_llm.NewFunction( + "no_answer", + "Indicate that the text does not answer the question.", + func(ctx context.Context, args struct{}) (string, error) { + return "", nil + }) + + req := go_llm.Request{ + Messages: []go_llm.Message{ + { + Role: go_llm.RoleSystem, + Text: "Evaluate the given text to see if it answers the question from the user. The text is as follows:", + }, + { + Role: go_llm.RoleSystem, + Text: text, + }, + { + Role: go_llm.RoleUser, + Text: q.Question, + }, + }, + Toolbox: go_llm.NewToolBox(fnAnswer, fnNoAnswer), + } + + res, err := q.Model.ChatComplete(ctx, req) + + if err != nil { + return "", err + } + + if len(res.Choices) == 0 { + return "", nil + } + + if len(res.Choices[0].Calls) == 0 { + return "", nil + } + + return req.Toolbox.Execute(ctx, res.Choices[0].Calls[0]) +} + +func functionSearch(ctx context.Context, q Question, searchTerm string) (string, error) { + res, err := q.Search.Search(ctx, searchTerm) + if err != nil { + return "", err + } + + if len(res) == 0 { + return "", nil + } + + // first pass try to see if any provide the result without needing archive + for _, r := range res { + trimmed := strings.TrimSpace(r.URL) + if trimmed == "" { + + } + u, err := url.Parse(trimmed) + if err != nil { + continue + } + + a, err := extractArticle(ctx, u) + + if err != nil { + continue + } + + if a.Title != "" && a.Body != "" { + answer, err := doesTextAnswerQuestion(ctx, q, a.Body) + + if err != nil { + slog.Error("error checking if text answers question", "question", q.Question, "error", err) + continue + } + + if answer != "" { + return answer, nil + } + } + } + + return "", nil +} + +func functionThink(ctx context.Context, q Question) (string, error) { + fnAnswer := go_llm.NewFunction( + "answer", + "Answer the question.", + func(ctx context.Context, args struct { + Answer string `description:"the answer to the question"` + }) (string, error) { + return args.Answer, nil + }) + + var temp float32 = 0.8 + req := go_llm.Request{ + Messages: []go_llm.Message{ + { + Role: go_llm.RoleSystem, + Text: "Answer the given question as accurately and concisely as possible using the answer function.", + }, + { + Role: go_llm.RoleUser, + Text: q.Question, + }, + }, + Toolbox: go_llm.NewToolBox(fnAnswer), + Temperature: &temp, + } + + res, err := q.Model.ChatComplete(ctx, req) + + if err != nil { + return "", err + } + + if len(res.Choices) == 0 { + return "", nil + } + + if len(res.Choices[0].Calls) == 0 { + return "", nil + } + + return req.Toolbox.Execute(ctx, res.Choices[0].Calls[0]) +} + +func (o Options) Answer(ctx context.Context, q Question) (Answers, error) { + fnSearch := go_llm.NewFunction( + "search", + "Search the web for an answer to a question. You can call this function up to "+fmt.Sprint(o.MaxSearches)+" times.", + func(ctx context.Context, args struct { + SearchQuery string `description:"what to search the web for for this question"` + Question string `description:"what question(s) you are trying to answer with this search"` + }) (string, error) { + q2 := q + q2.Question = args.Question + + return functionSearch(ctx, q2, args.SearchQuery) + }) + + fnThink := go_llm.NewFunction( + "think", + "Think about a question. This is useful for breaking down complex questions into smaller parts that are easier to answer.", + func(ctx context.Context, args struct { + Question string `description:"the question to think about"` + }) (string, error) { + q2 := q + q2.Question = args.Question + + return functionThink(ctx, q2) + }) + + fnAnswer := go_llm.NewFunction( + "answer", + "You definitively answer a question, if you call this it means you know the answer and do not need to search for it or use any other function to find it", + func(ctx context.Context, args struct { + Answer string `description:"the answer to the question"` + }) (string, error) { + return args.Answer, nil + }) + + var funcs = []*go_llm.Function{fnAnswer} + + if o.MaxSearches > 0 { + funcs = append(funcs, fnSearch) + } + + if o.MaxThinks > 0 { + funcs = append(funcs, fnThink) + } + + var temp float32 = 0.8 + + req := go_llm.Request{ + Messages: []go_llm.Message{ + { + Role: go_llm.RoleSystem, + Text: "You are being asked to answer a question. You must respond with a function. You can answer it if you know the answer, or if some functions exist you can use those to help you find the answer.", + }, + { + Role: go_llm.RoleUser, + Text: q.Question, + }, + }, + Toolbox: go_llm.NewToolBox(funcs...), + Temperature: &temp, + } + + res, err := q.Model.ChatComplete(ctx, req) + + if err != nil { + return nil, err + } + + if len(res.Choices) == 0 { + return nil, nil + } + + if len(res.Choices) > o.MaxSearches { + res.Choices = res.Choices[:o.MaxSearches] + } + + var answers []QuestionAnswer + choicesOutput := make(chan QuestionAnswer, len(res.Choices)) + + for _, choice := range res.Choices { + fnChoice := func(choice go_llm.ResponseChoice) QuestionAnswer { + var calls []CallResult + var callsOutput = make(chan CallResult, len(choice.Calls)) + fnCall := func(call go_llm.ToolCall) CallResult { + str, err := req.Toolbox.Execute(ctx, call) + + if err != nil { + return CallResult{ + Error: err, + } + } + + return CallResult{ + Result: str, + } + } + + for _, call := range choice.Calls { + go func(call go_llm.ToolCall) { + callsOutput <- fnCall(call) + }(call) + } + + for i := 0; i < len(choice.Calls); i++ { + result := <-callsOutput + calls = append(calls, result) + } + + close(callsOutput) + + } + + } + +} + +func Answer(ctx context.Context, q Question) (Answers, error) { + return DefaultOptions.Answer(ctx, q) +} diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go new file mode 100644 index 0000000..4aaa686 --- /dev/null +++ b/pkg/cache/cache.go @@ -0,0 +1,15 @@ +package cache + +import "io" + +type Cache interface { + Get(key string, writer io.Writer) error + GetString(key string) (string, error) + GetJSON(key string, value any) error + + Set(key string, value io.Reader) error + SetJSON(key string, value any) error + SetString(key string, value string) error + + Delete(key string) error +} diff --git a/pkg/cache/directory.go b/pkg/cache/directory.go new file mode 100644 index 0000000..c937951 --- /dev/null +++ b/pkg/cache/directory.go @@ -0,0 +1,160 @@ +package cache + +import ( + "bytes" + "context" + "encoding/json" + "io" + "os" + "path/filepath" + "sync" + "time" +) + +type Directory struct { + BaseFolder string + MaxLife time.Duration + + lock sync.Mutex +} + +var _ Cache = &Directory{} + +func (d *Directory) GetPath(key string) string { + return filepath.Join(d.BaseFolder, key+".json") +} + +func (d *Directory) Cleanup(_ context.Context) error { + d.lock.Lock() + + defer func() { + d.lock.Unlock() + }() + + // go through the BaseFilder looking for any files that are older than MaxLife + return filepath.Walk(d.BaseFolder, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // ignore directories + if info.IsDir() { + return nil + } + + // only files that end in .json + if filepath.Ext(path) != ".json" { + return nil + } + + // if the openFile is older than MaxLife, delete it + if time.Since(info.ModTime()) > d.MaxLife { + return os.Remove(path) + } + + return nil + }) +} + +// AutoCleanupRoutine will continually loop and cleanup the directory, until the context is cancelled or an error occurs +// returns nil on context cancellation, or an error if one occurs during cleanup +func (d *Directory) AutoCleanupRoutine(ctx context.Context) error { + for { + select { + case <-ctx.Done(): + return nil + + case <-time.After(d.MaxLife): + err := d.Cleanup(ctx) + if err != nil { + return err + } + } + } +} + +func (d *Directory) openFile(key string) (*os.File, error) { + path := d.GetPath(key) + + return os.Open(path) +} + +func (d *Directory) Set(key string, value io.Reader) error { + d.lock.Lock() + defer d.lock.Unlock() + + fp, err := d.openFile(key) + if err != nil { + return err + } + + defer func(fp *os.File) { + _ = fp.Close() + }(fp) + + _, err = io.Copy(fp, value) + + return err +} + +func (d *Directory) SetJSON(key string, value any) error { + d.lock.Lock() + defer d.lock.Unlock() + + fp, err := d.openFile(key) + if err != nil { + return err + } + + defer func(fp *os.File) { + _ = fp.Close() + }(fp) + + return json.NewEncoder(fp).Encode(value) +} + +func (d *Directory) SetString(key, value string) error { + return d.Set(key, bytes.NewReader([]byte(value))) +} + +func (d *Directory) Get(key string, w io.Writer) error { + d.lock.Lock() + defer d.lock.Unlock() + + fp, err := d.openFile(key) + if err != nil { + return err + } + defer fp.Close() + + _, err = io.Copy(w, fp) + return err +} + +func (d *Directory) GetJSON(key string, v any) error { + d.lock.Lock() + defer d.lock.Unlock() + + fp, err := d.openFile(key) + if err != nil { + return err + } + + defer fp.Close() + + return json.NewEncoder(fp).Encode(v) +} + +func (d *Directory) GetString(key string) (string, error) { + var buf bytes.Buffer + + err := d.Get(key, &buf) + return buf.String(), err +} + +func (d *Directory) Delete(key string) error { + d.lock.Lock() + defer d.lock.Unlock() + + return os.Remove(d.GetPath(key)) +} diff --git a/pkg/search/google.go b/pkg/search/google.go new file mode 100644 index 0000000..41cba52 --- /dev/null +++ b/pkg/search/google.go @@ -0,0 +1,44 @@ +package search + +import ( + "context" + googlesearch "github.com/rocketlaunchr/google-search" + "sort" +) + +type Google struct { +} + +var _ Search = Google{} + +func (Google) Search(ctx context.Context, search string) ([]Result, error) { + res, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{ + CountryCode: "", + LanguageCode: "", + Limit: 0, + Start: 0, + UserAgent: "", + OverLimit: false, + ProxyAddr: "", + FollowNextPage: false, + }) + + if err != nil { + return nil, err + } + + var results []Result + + // just in case, sort the res by rank, as the api does not mention it is sorted + sort.Slice(res, func(i, j int) bool { + return res[i].Rank < res[j].Rank + }) + + for _, r := range res { + results = append(results, Result{ + Title: r.Title, + URL: r.URL, + Description: r.Description, + }) + } +} diff --git a/pkg/search/search.go b/pkg/search/search.go new file mode 100644 index 0000000..49a4bd7 --- /dev/null +++ b/pkg/search/search.go @@ -0,0 +1,13 @@ +package search + +import "context" + +type Result struct { + Title string + URL string + Description string +} + +type Search interface { + Search(ctx context.Context, query string) ([]Result, error) +}