initial commit

This commit is contained in:
Steve Dudenhoeffer 2024-11-08 20:51:12 -05:00
commit 98fa840f87
8 changed files with 739 additions and 0 deletions

0
.gitignore vendored Normal file
View File

36
cmd/answer.go Normal file
View File

@ -0,0 +1,36 @@
package main
import (
"fmt"
"github.com/urfave/cli"
)
func main() {
// Usage: go run cmd/answer.go question...
// - flags:
// --model=[model string such as openai/gpt-4o, anthropic/claude..., google/gemini-1.5. Default: openai/gpt-4o]
// --search-provider=[search provider string such as google, duckduckgo. Default: google]
var app = cli.App{
Name: "answer",
Usage: "has an llm search the web for you to answer a question",
Version: "0.1",
Description: "",
Action: func(c *cli.Context) error {
// if there is no question to answer, print usage
if c.NArg() == 0 {
return cli.ShowAppHelp(c)
}
// get the question
fmt.Println("Head: ", c.Args().First())
fmt.Println("Tail: ", c.Args().Tail())
return nil
},
}
app.Run()
}

60
go.mod Normal file
View File

@ -0,0 +1,60 @@
module answer
go 1.23.2
replace gitea.stevedudenhoeffer.com/steve/go-llm => ../go-llm
require (
cloud.google.com/go v0.115.0 // indirect
cloud.google.com/go/ai v0.8.0 // indirect
cloud.google.com/go/auth v0.6.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
cloud.google.com/go/compute/metadata v0.3.0 // indirect
cloud.google.com/go/longrunning v0.5.7 // indirect
gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20241031152103-f603010dee49 // indirect
github.com/PuerkitoBio/goquery v1.8.1 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/antchfx/htmlquery v1.3.0 // indirect
github.com/antchfx/xmlquery v1.3.15 // indirect
github.com/antchfx/xpath v1.2.4 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/gobwas/glob v0.2.3 // indirect
github.com/gocolly/colly/v2 v2.1.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/generative-ai-go v0.18.0 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/liushuangls/go-anthropic/v2 v2.8.0 // indirect
github.com/rocketlaunchr/google-search v1.1.6 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
github.com/sashabaranov/go-openai v1.31.0 // indirect
github.com/temoto/robotstxt v1.1.2 // indirect
github.com/urfave/cli v1.22.16 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect
go.opentelemetry.io/otel v1.26.0 // indirect
go.opentelemetry.io/otel/metric v1.26.0 // indirect
go.opentelemetry.io/otel/trace v1.26.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/api v0.186.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
google.golang.org/grpc v1.64.1 // indirect
google.golang.org/protobuf v1.34.2 // indirect
)

411
pkg/answer/answer.go Normal file
View File

@ -0,0 +1,411 @@
package answer
import (
"answer/pkg/cache"
"answer/pkg/search"
"context"
"errors"
"fmt"
go_llm "gitea.stevedudenhoeffer.com/steve/go-llm"
"io"
"log/slog"
"net/http"
"net/url"
"strings"
)
var ErrMaxTries = errors.New("maximum number of pages tried reached")
var ErrMaxAnswers = errors.New("maximum number of answers parsed reached")
var ErrTooManyArguments = errors.New("too many arguments")
type Question struct {
// Question is the question to answer
Question string
Model go_llm.ChatCompletion
Search search.Search
Cache cache.Cache
}
// Answers is a list of answers to a question
type Answers []string
type Options struct {
// MaxSearches is the maximum possible number of searches to execute for this question. If this is set to 5, the function could
// search up to 5 possible times to find an answer.
MaxSearches int
// MaxThinks is the maximum number of times to think about a question. A "Think" is different than a search in that
// the LLM just breaks the question down into smaller parts and tries to answer them. This is useful for complex
// questions that are hard to answer since LLMs are better at answering smaller questions.
MaxThinks int
// MaxTries is the absolute maximum number of pages to try to get an answer from. For instance, if MaxSearches is 5 and
// 5 pages are tried and no answers are found, the function will return ErrMaxTries.
MaxTries int
}
var DefaultOptions = Options{
MaxSearches: 5,
MaxThinks: 10,
MaxTries: 5,
}
type Result struct {
Result string
Error error
}
func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []go_llm.ToolCall) []Result {
var results []Result
var resultsOutput = make(chan Result, len(calls))
fnCall := func(call go_llm.ToolCall) Result {
str, err := toolBox.Execute(ctx, call)
if err != nil {
return Result{
Error: err,
}
}
return Result{
Result: str,
}
}
for _, call := range calls {
go func(call go_llm.ToolCall) {
resultsOutput <- fnCall(call)
}(call)
}
for i := 0; i < len(calls); i++ {
result := <-resultsOutput
results = append(results, result)
}
close(resultsOutput)
return results
}
type article struct {
URL string
Title string
Body string
}
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
defer func() {
e := recover()
if e != nil {
if e, ok := e.(error); ok {
err = fmt.Errorf("panic: %w", e)
} else {
err = fmt.Errorf("panic: %v", e)
}
}
}()
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
if err != nil {
return res, fmt.Errorf("error creating request: %w", err)
}
resp, err := c.cl.Do(req)
if err != nil {
return res, fmt.Errorf("error getting response: %w", err)
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
slog.Error("error closing body", "error", err)
}
}(resp.Body)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("bad response: %d: %s", resp.StatusCode, resp.Status)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading body: %w", err)
}
g := goose.New()
article, err := g.ExtractFromRawHTML(string(b), target)
if err != nil {
return "", fmt.Errorf("error extracting article: %w", err)
}
return article.CleanedText, nil
panic("not implemented")
return article{}, nil
}
func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (string, error) {
fnAnswer := go_llm.NewFunction(
"answer",
"The answer from the given text that answers the question.",
func(ctx context.Context, args struct {
Answer string `description:"the answer to the question, the answer should come from the text"`
}) (string, error) {
return args.Answer, nil
})
fnNoAnswer := go_llm.NewFunction(
"no_answer",
"Indicate that the text does not answer the question.",
func(ctx context.Context, args struct{}) (string, error) {
return "", nil
})
req := go_llm.Request{
Messages: []go_llm.Message{
{
Role: go_llm.RoleSystem,
Text: "Evaluate the given text to see if it answers the question from the user. The text is as follows:",
},
{
Role: go_llm.RoleSystem,
Text: text,
},
{
Role: go_llm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(fnAnswer, fnNoAnswer),
}
res, err := q.Model.ChatComplete(ctx, req)
if err != nil {
return "", err
}
if len(res.Choices) == 0 {
return "", nil
}
if len(res.Choices[0].Calls) == 0 {
return "", nil
}
return req.Toolbox.Execute(ctx, res.Choices[0].Calls[0])
}
func functionSearch(ctx context.Context, q Question, searchTerm string) (string, error) {
res, err := q.Search.Search(ctx, searchTerm)
if err != nil {
return "", err
}
if len(res) == 0 {
return "", nil
}
// first pass try to see if any provide the result without needing archive
for _, r := range res {
trimmed := strings.TrimSpace(r.URL)
if trimmed == "" {
}
u, err := url.Parse(trimmed)
if err != nil {
continue
}
a, err := extractArticle(ctx, u)
if err != nil {
continue
}
if a.Title != "" && a.Body != "" {
answer, err := doesTextAnswerQuestion(ctx, q, a.Body)
if err != nil {
slog.Error("error checking if text answers question", "question", q.Question, "error", err)
continue
}
if answer != "" {
return answer, nil
}
}
}
return "", nil
}
func functionThink(ctx context.Context, q Question) (string, error) {
fnAnswer := go_llm.NewFunction(
"answer",
"Answer the question.",
func(ctx context.Context, args struct {
Answer string `description:"the answer to the question"`
}) (string, error) {
return args.Answer, nil
})
var temp float32 = 0.8
req := go_llm.Request{
Messages: []go_llm.Message{
{
Role: go_llm.RoleSystem,
Text: "Answer the given question as accurately and concisely as possible using the answer function.",
},
{
Role: go_llm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(fnAnswer),
Temperature: &temp,
}
res, err := q.Model.ChatComplete(ctx, req)
if err != nil {
return "", err
}
if len(res.Choices) == 0 {
return "", nil
}
if len(res.Choices[0].Calls) == 0 {
return "", nil
}
return req.Toolbox.Execute(ctx, res.Choices[0].Calls[0])
}
func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
fnSearch := go_llm.NewFunction(
"search",
"Search the web for an answer to a question. You can call this function up to "+fmt.Sprint(o.MaxSearches)+" times.",
func(ctx context.Context, args struct {
SearchQuery string `description:"what to search the web for for this question"`
Question string `description:"what question(s) you are trying to answer with this search"`
}) (string, error) {
q2 := q
q2.Question = args.Question
return functionSearch(ctx, q2, args.SearchQuery)
})
fnThink := go_llm.NewFunction(
"think",
"Think about a question. This is useful for breaking down complex questions into smaller parts that are easier to answer.",
func(ctx context.Context, args struct {
Question string `description:"the question to think about"`
}) (string, error) {
q2 := q
q2.Question = args.Question
return functionThink(ctx, q2)
})
fnAnswer := go_llm.NewFunction(
"answer",
"You definitively answer a question, if you call this it means you know the answer and do not need to search for it or use any other function to find it",
func(ctx context.Context, args struct {
Answer string `description:"the answer to the question"`
}) (string, error) {
return args.Answer, nil
})
var funcs = []*go_llm.Function{fnAnswer}
if o.MaxSearches > 0 {
funcs = append(funcs, fnSearch)
}
if o.MaxThinks > 0 {
funcs = append(funcs, fnThink)
}
var temp float32 = 0.8
req := go_llm.Request{
Messages: []go_llm.Message{
{
Role: go_llm.RoleSystem,
Text: "You are being asked to answer a question. You must respond with a function. You can answer it if you know the answer, or if some functions exist you can use those to help you find the answer.",
},
{
Role: go_llm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(funcs...),
Temperature: &temp,
}
res, err := q.Model.ChatComplete(ctx, req)
if err != nil {
return nil, err
}
if len(res.Choices) == 0 {
return nil, nil
}
if len(res.Choices) > o.MaxSearches {
res.Choices = res.Choices[:o.MaxSearches]
}
var answers []QuestionAnswer
choicesOutput := make(chan QuestionAnswer, len(res.Choices))
for _, choice := range res.Choices {
fnChoice := func(choice go_llm.ResponseChoice) QuestionAnswer {
var calls []CallResult
var callsOutput = make(chan CallResult, len(choice.Calls))
fnCall := func(call go_llm.ToolCall) CallResult {
str, err := req.Toolbox.Execute(ctx, call)
if err != nil {
return CallResult{
Error: err,
}
}
return CallResult{
Result: str,
}
}
for _, call := range choice.Calls {
go func(call go_llm.ToolCall) {
callsOutput <- fnCall(call)
}(call)
}
for i := 0; i < len(choice.Calls); i++ {
result := <-callsOutput
calls = append(calls, result)
}
close(callsOutput)
}
}
}
func Answer(ctx context.Context, q Question) (Answers, error) {
return DefaultOptions.Answer(ctx, q)
}

15
pkg/cache/cache.go vendored Normal file
View File

@ -0,0 +1,15 @@
package cache
import "io"
type Cache interface {
Get(key string, writer io.Writer) error
GetString(key string) (string, error)
GetJSON(key string, value any) error
Set(key string, value io.Reader) error
SetJSON(key string, value any) error
SetString(key string, value string) error
Delete(key string) error
}

160
pkg/cache/directory.go vendored Normal file
View File

@ -0,0 +1,160 @@
package cache
import (
"bytes"
"context"
"encoding/json"
"io"
"os"
"path/filepath"
"sync"
"time"
)
type Directory struct {
BaseFolder string
MaxLife time.Duration
lock sync.Mutex
}
var _ Cache = &Directory{}
func (d *Directory) GetPath(key string) string {
return filepath.Join(d.BaseFolder, key+".json")
}
func (d *Directory) Cleanup(_ context.Context) error {
d.lock.Lock()
defer func() {
d.lock.Unlock()
}()
// go through the BaseFilder looking for any files that are older than MaxLife
return filepath.Walk(d.BaseFolder, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// ignore directories
if info.IsDir() {
return nil
}
// only files that end in .json
if filepath.Ext(path) != ".json" {
return nil
}
// if the openFile is older than MaxLife, delete it
if time.Since(info.ModTime()) > d.MaxLife {
return os.Remove(path)
}
return nil
})
}
// AutoCleanupRoutine will continually loop and cleanup the directory, until the context is cancelled or an error occurs
// returns nil on context cancellation, or an error if one occurs during cleanup
func (d *Directory) AutoCleanupRoutine(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return nil
case <-time.After(d.MaxLife):
err := d.Cleanup(ctx)
if err != nil {
return err
}
}
}
}
func (d *Directory) openFile(key string) (*os.File, error) {
path := d.GetPath(key)
return os.Open(path)
}
func (d *Directory) Set(key string, value io.Reader) error {
d.lock.Lock()
defer d.lock.Unlock()
fp, err := d.openFile(key)
if err != nil {
return err
}
defer func(fp *os.File) {
_ = fp.Close()
}(fp)
_, err = io.Copy(fp, value)
return err
}
func (d *Directory) SetJSON(key string, value any) error {
d.lock.Lock()
defer d.lock.Unlock()
fp, err := d.openFile(key)
if err != nil {
return err
}
defer func(fp *os.File) {
_ = fp.Close()
}(fp)
return json.NewEncoder(fp).Encode(value)
}
func (d *Directory) SetString(key, value string) error {
return d.Set(key, bytes.NewReader([]byte(value)))
}
func (d *Directory) Get(key string, w io.Writer) error {
d.lock.Lock()
defer d.lock.Unlock()
fp, err := d.openFile(key)
if err != nil {
return err
}
defer fp.Close()
_, err = io.Copy(w, fp)
return err
}
func (d *Directory) GetJSON(key string, v any) error {
d.lock.Lock()
defer d.lock.Unlock()
fp, err := d.openFile(key)
if err != nil {
return err
}
defer fp.Close()
return json.NewEncoder(fp).Encode(v)
}
func (d *Directory) GetString(key string) (string, error) {
var buf bytes.Buffer
err := d.Get(key, &buf)
return buf.String(), err
}
func (d *Directory) Delete(key string) error {
d.lock.Lock()
defer d.lock.Unlock()
return os.Remove(d.GetPath(key))
}

44
pkg/search/google.go Normal file
View File

@ -0,0 +1,44 @@
package search
import (
"context"
googlesearch "github.com/rocketlaunchr/google-search"
"sort"
)
type Google struct {
}
var _ Search = Google{}
func (Google) Search(ctx context.Context, search string) ([]Result, error) {
res, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{
CountryCode: "",
LanguageCode: "",
Limit: 0,
Start: 0,
UserAgent: "",
OverLimit: false,
ProxyAddr: "",
FollowNextPage: false,
})
if err != nil {
return nil, err
}
var results []Result
// just in case, sort the res by rank, as the api does not mention it is sorted
sort.Slice(res, func(i, j int) bool {
return res[i].Rank < res[j].Rank
})
for _, r := range res {
results = append(results, Result{
Title: r.Title,
URL: r.URL,
Description: r.Description,
})
}
}

13
pkg/search/search.go Normal file
View File

@ -0,0 +1,13 @@
package search
import "context"
type Result struct {
Title string
URL string
Description string
}
type Search interface {
Search(ctx context.Context, query string) ([]Result, error)
}