sync of changes

This commit is contained in:
2024-11-09 19:50:14 -05:00
parent cc7b03c614
commit a83d5f9822
9 changed files with 491 additions and 95 deletions

View File

@@ -2,14 +2,13 @@ package answer
import (
"answer/pkg/cache"
"answer/pkg/extractor"
"answer/pkg/search"
"context"
"errors"
"fmt"
go_llm "gitea.stevedudenhoeffer.com/steve/go-llm"
"io"
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
"log/slog"
"net/http"
"net/url"
"strings"
)
@@ -22,7 +21,7 @@ type Question struct {
// Question is the question to answer
Question string
Model go_llm.ChatCompletion
Model gollm.ChatCompletion
Search search.Search
@@ -58,11 +57,11 @@ type Result struct {
Error error
}
func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []go_llm.ToolCall) []Result {
func fanExecuteToolCalls(ctx context.Context, toolBox *gollm.ToolBox, calls []gollm.ToolCall) []Result {
var results []Result
var resultsOutput = make(chan Result, len(calls))
fnCall := func(call go_llm.ToolCall) Result {
fnCall := func(call gollm.ToolCall) Result {
str, err := toolBox.Execute(ctx, call)
if err != nil {
return Result{
@@ -76,7 +75,7 @@ func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []g
}
for _, call := range calls {
go func(call go_llm.ToolCall) {
go func(call gollm.ToolCall) {
resultsOutput <- fnCall(call)
}(call)
}
@@ -97,7 +96,7 @@ type article struct {
Body string
}
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
func extractArticle(ctx context.Context, c cache.Cache, u *url.URL) (res article, err error) {
defer func() {
e := recover()
@@ -110,49 +109,38 @@ func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
}
}()
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
extractors := extractor.MultiExtractor(
extractor.CacheExtractor{
Cache: c,
Tag: "goose",
Extractor: extractor.GooseExtractor{},
},
extractor.CacheExtractor{
Cache: c,
Tag: "playwright",
Extractor: extractor.PlaywrightExtractor{},
},
)
a, err := extractors.Extract(ctx, u.String())
if err != nil {
return res, fmt.Errorf("error creating request: %w", err)
return article{
URL: "",
Title: "",
Body: "",
}, err
}
resp, err := c.cl.Do(req)
if err != nil {
return res, fmt.Errorf("error getting response: %w", err)
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
slog.Error("error closing body", "error", err)
}
}(resp.Body)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("bad response: %d: %s", resp.StatusCode, resp.Status)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading body: %w", err)
}
g := goose.New()
article, err := g.ExtractFromRawHTML(string(b), target)
if err != nil {
return "", fmt.Errorf("error extracting article: %w", err)
}
return article.CleanedText, nil
panic("not implemented")
return article{}, nil
return article{
URL: a.URL,
Title: a.Title,
Body: a.Body,
}, nil
}
func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (string, error) {
fnAnswer := go_llm.NewFunction(
fnAnswer := gollm.NewFunction(
"answer",
"The answer from the given text that answers the question.",
func(ctx context.Context, args struct {
@@ -161,29 +149,29 @@ func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (strin
return args.Answer, nil
})
fnNoAnswer := go_llm.NewFunction(
fnNoAnswer := gollm.NewFunction(
"no_answer",
"Indicate that the text does not answer the question.",
func(ctx context.Context, args struct{}) (string, error) {
return "", nil
})
req := go_llm.Request{
Messages: []go_llm.Message{
req := gollm.Request{
Messages: []gollm.Message{
{
Role: go_llm.RoleSystem,
Role: gollm.RoleSystem,
Text: "Evaluate the given text to see if it answers the question from the user. The text is as follows:",
},
{
Role: go_llm.RoleSystem,
Role: gollm.RoleSystem,
Text: text,
},
{
Role: go_llm.RoleUser,
Role: gollm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(fnAnswer, fnNoAnswer),
Toolbox: gollm.NewToolBox(fnAnswer, fnNoAnswer),
}
res, err := q.Model.ChatComplete(ctx, req)
@@ -224,7 +212,7 @@ func functionSearch(ctx context.Context, q Question, searchTerm string) (string,
continue
}
a, err := extractArticle(ctx, u)
a, err := extractArticle(ctx, q.Cache, u)
if err != nil {
continue
@@ -248,7 +236,7 @@ func functionSearch(ctx context.Context, q Question, searchTerm string) (string,
}
func functionThink(ctx context.Context, q Question) (string, error) {
fnAnswer := go_llm.NewFunction(
fnAnswer := gollm.NewFunction(
"answer",
"Answer the question.",
func(ctx context.Context, args struct {
@@ -258,18 +246,18 @@ func functionThink(ctx context.Context, q Question) (string, error) {
})
var temp float32 = 0.8
req := go_llm.Request{
Messages: []go_llm.Message{
req := gollm.Request{
Messages: []gollm.Message{
{
Role: go_llm.RoleSystem,
Role: gollm.RoleSystem,
Text: "Answer the given question as accurately and concisely as possible using the answer function.",
},
{
Role: go_llm.RoleUser,
Role: gollm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(fnAnswer),
Toolbox: gollm.NewToolBox(fnAnswer),
Temperature: &temp,
}
@@ -291,7 +279,7 @@ func functionThink(ctx context.Context, q Question) (string, error) {
}
func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
fnSearch := go_llm.NewFunction(
fnSearch := gollm.NewFunction(
"search",
"Search the web for an answer to a question. You can call this function up to "+fmt.Sprint(o.MaxSearches)+" times.",
func(ctx context.Context, args struct {
@@ -304,7 +292,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
return functionSearch(ctx, q2, args.SearchQuery)
})
fnThink := go_llm.NewFunction(
fnThink := gollm.NewFunction(
"think",
"Think about a question. This is useful for breaking down complex questions into smaller parts that are easier to answer.",
func(ctx context.Context, args struct {
@@ -316,7 +304,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
return functionThink(ctx, q2)
})
fnAnswer := go_llm.NewFunction(
fnAnswer := gollm.NewFunction(
"answer",
"You definitively answer a question, if you call this it means you know the answer and do not need to search for it or use any other function to find it",
func(ctx context.Context, args struct {
@@ -325,7 +313,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
return args.Answer, nil
})
var funcs = []*go_llm.Function{fnAnswer}
var funcs = []*gollm.Function{fnAnswer}
if o.MaxSearches > 0 {
funcs = append(funcs, fnSearch)
@@ -337,18 +325,18 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
var temp float32 = 0.8
req := go_llm.Request{
Messages: []go_llm.Message{
req := gollm.Request{
Messages: []gollm.Message{
{
Role: go_llm.RoleSystem,
Role: gollm.RoleSystem,
Text: "You are being asked to answer a question. You must respond with a function. You can answer it if you know the answer, or if some functions exist you can use those to help you find the answer.",
},
{
Role: go_llm.RoleUser,
Role: gollm.RoleUser,
Text: q.Question,
},
},
Toolbox: go_llm.NewToolBox(funcs...),
Toolbox: gollm.NewToolBox(funcs...),
Temperature: &temp,
}
@@ -366,29 +354,29 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
res.Choices = res.Choices[:o.MaxSearches]
}
var answers []QuestionAnswer
choicesOutput := make(chan QuestionAnswer, len(res.Choices))
var answers Answers
choicesOutput := make(chan string, len(res.Choices))
for _, choice := range res.Choices {
fnChoice := func(choice go_llm.ResponseChoice) QuestionAnswer {
var calls []CallResult
var callsOutput = make(chan CallResult, len(choice.Calls))
fnCall := func(call go_llm.ToolCall) CallResult {
fnChoice := func(choice gollm.ResponseChoice) string {
var calls []Result
var callsOutput = make(chan Result, len(choice.Calls))
fnCall := func(call gollm.ToolCall) Result {
str, err := req.Toolbox.Execute(ctx, call)
if err != nil {
return CallResult{
return Result{
Error: err,
}
}
return CallResult{
return Result{
Result: str,
}
}
for _, call := range choice.Calls {
go func(call go_llm.ToolCall) {
go func(call gollm.ToolCall) {
callsOutput <- fnCall(call)
}(call)
}
@@ -402,8 +390,12 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
}
answers = append(answers, fnChoice(choice))
}
return answers, nil
}
func Answer(ctx context.Context, q Question) (Answers, error) {

51
pkg/cache/cache.go vendored
View File

@@ -1,6 +1,16 @@
package cache
import "io"
import (
"crypto/sha256"
"errors"
"fmt"
"io"
)
var (
// ErrNotFound is returned when the key is not found in the cache
ErrNotFound = errors.New("key not found")
)
type Cache interface {
Get(key string, writer io.Writer) error
@@ -13,3 +23,42 @@ type Cache interface {
Delete(key string) error
}
type ShaWrapper struct {
Cache Cache
}
func (s ShaWrapper) hash(key string) string {
// hash the key to a sha256
hash := sha256.Sum256([]byte(key))
// return the hex representation of the hash
return fmt.Sprintf("%x", hash)
}
func (s ShaWrapper) Get(key string, writer io.Writer) error {
return s.Cache.Get(s.hash(key), writer)
}
func (s ShaWrapper) GetString(key string) (string, error) {
return s.Cache.GetString(s.hash(key))
}
func (s ShaWrapper) GetJSON(key string, value any) error {
return s.Cache.GetJSON(s.hash(key), value)
}
func (s ShaWrapper) Set(key string, value io.Reader) error {
return s.Cache.Set(s.hash(key), value)
}
func (s ShaWrapper) SetJSON(key string, value any) error {
return s.Cache.SetJSON(s.hash(key), value)
}
func (s ShaWrapper) SetString(key string, value string) error {
return s.Cache.SetString(s.hash(key), value)
}
func (s ShaWrapper) Delete(key string) error {
return s.Cache.Delete(s.hash(key))
}

View File

@@ -14,8 +14,7 @@ import (
type Directory struct {
BaseFolder string
MaxLife time.Duration
lock sync.Mutex
lock sync.Mutex
}
var _ Cache = &Directory{}
@@ -76,7 +75,16 @@ func (d *Directory) AutoCleanupRoutine(ctx context.Context) error {
func (d *Directory) openFile(key string) (*os.File, error) {
path := d.GetPath(key)
return os.Open(path)
res, err := os.Open(path)
if err != nil {
if os.IsNotExist(err) {
return nil, ErrNotFound
}
return nil, err
}
return res, nil
}
func (d *Directory) Set(key string, value io.Reader) error {

View File

@@ -0,0 +1,87 @@
package extractor
import (
"answer/pkg/cache"
"context"
"errors"
)
var ErrFailedToExtract = errors.New("failed to extract")
type Article struct {
URL string
Title string
Body string
}
// Extractor is an interface of systems that can extract the contents of
type Extractor interface {
Extract(ctx context.Context, url string) (Article, error)
}
type multiExtractor struct {
extractors []Extractor
}
var _ Extractor = multiExtractor{}
// Extract will try to extract the contents of a URL using all the extractors, and return the first successful result.
func (m multiExtractor) Extract(ctx context.Context, url string) (Article, error) {
var errs []error
for _, e := range m.extractors {
article, err := e.Extract(ctx, url)
if err == nil {
return article, nil
}
if errors.Is(err, ErrFailedToExtract) {
continue
}
errs = append(errs, err)
}
if len(errs) > 0 {
return Article{}, errors.Join(errs...)
}
return Article{}, ErrFailedToExtract
}
func MultiExtractor(e ...Extractor) Extractor {
return multiExtractor{extractors: e}
}
type CacheExtractor struct {
Cache cache.Cache
Tag string
Extractor Extractor
}
var _ Extractor = CacheExtractor{}
func (c CacheExtractor) Extract(ctx context.Context, url string) (Article, error) {
tag := c.Tag
if tag == "" {
tag = "defaultextractor:"
}
key := tag + ":" + url
var article Article
err := c.Cache.GetJSON(key, &article)
if err == nil {
return article, nil
}
article, err = c.Extractor.Extract(ctx, url)
if err != nil {
return Article{}, err
}
err = c.Cache.SetJSON(key, article)
if err != nil {
return Article{}, err
}
return article, nil
}

25
pkg/extractor/goose.go Normal file
View File

@@ -0,0 +1,25 @@
package extractor
import (
"context"
goose "github.com/advancedlogic/GoOse"
)
type GooseExtractor struct {
}
func (GooseExtractor) Extract(ctx context.Context, url string) (Article, error) {
var res = Article{
URL: url,
}
g := goose.New()
article, err := g.ExtractFromURL(url)
if err != nil {
return res, err
}
res.Body = article.CleanedText
res.Title = article.Title
return res, nil
}

View File

@@ -0,0 +1,81 @@
package extractor
import (
"context"
"fmt"
"github.com/playwright-community/playwright-go"
"os"
)
type PlaywrightExtractor struct {
}
var _ Extractor = PlaywrightExtractor{}
func getReadabilityJS() (string, error) {
data, err := os.ReadFile("readability.js")
if err != nil {
return "", err
}
return string(data), nil
}
func (p PlaywrightExtractor) Extract(_ context.Context, url string) (Article, error) {
var article = Article{
URL: url,
}
pw, err := playwright.Run()
if err != nil {
return article, err
}
defer pw.Stop()
browser, err := pw.Chromium.Launch()
if err != nil {
return article, err
}
defer browser.Close()
page, err := browser.NewPage()
if err != nil {
return article, err
}
defer page.Close()
_, err = page.Goto(url)
if err != nil {
return article, err
}
// Inject Readability.js
readabilityJS, err := getReadabilityJS()
if err != nil {
return article, err
}
_, err = page.AddScriptTag(playwright.PageAddScriptTagOptions{
Content: &readabilityJS,
})
if err != nil {
return article, err
}
// Run Readability and get the article content
content, err := page.Evaluate(`() => {
let article = new Readability(document).parse();
return article ? article.textContent : null;
}`)
if err != nil {
return article, err
}
text, ok := content.(string)
if !ok {
return article, fmt.Errorf("failed to convert content to string")
}
article.Body = text
article.Title, _ = page.Title()
return article, nil
}

View File

@@ -1,18 +1,30 @@
package search
import (
"answer/pkg/cache"
"context"
googlesearch "github.com/rocketlaunchr/google-search"
"sort"
)
type Google struct {
Cache cache.Cache
}
var _ Search = Google{}
func (Google) Search(ctx context.Context, search string) ([]Result, error) {
res, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{
func (g Google) Search(ctx context.Context, search string) ([]Result, error) {
var res []Result
key := "google:" + search
err := g.Cache.GetJSON(key, &res)
if err == nil {
return res, nil
}
results, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{
CountryCode: "",
LanguageCode: "",
Limit: 0,
@@ -27,18 +39,20 @@ func (Google) Search(ctx context.Context, search string) ([]Result, error) {
return nil, err
}
var results []Result
// just in case, sort the res by rank, as the api does not mention it is sorted
sort.Slice(res, func(i, j int) bool {
return res[i].Rank < res[j].Rank
return results[i].Rank < results[j].Rank
})
for _, r := range res {
results = append(results, Result{
for _, r := range results {
res = append(res, Result{
Title: r.Title,
URL: r.URL,
Description: r.Description,
})
}
_ = g.Cache.SetJSON(key, res)
return res, nil
}