sync of changes
This commit is contained in:
parent
cc7b03c614
commit
a83d5f9822
131
cmd/answer.go
131
cmd/answer.go
@ -1,15 +1,33 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"answer/pkg/answer"
|
||||
"answer/pkg/cache"
|
||||
"answer/pkg/search"
|
||||
"context"
|
||||
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
||||
"github.com/urfave/cli"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func getKey(key string, env string) string {
|
||||
if key != "" {
|
||||
return key
|
||||
}
|
||||
|
||||
return os.Getenv(env)
|
||||
}
|
||||
|
||||
func main() {
|
||||
ctx := context.Background()
|
||||
// Usage: go run cmd/answer.go question...
|
||||
// - flags:
|
||||
// --model=[model string such as openai/gpt-4o, anthropic/claude..., google/gemini-1.5. Default: openai/gpt-4o]
|
||||
// --search-provider=[search provider string such as google, duckduckgo. Default: google]
|
||||
// --cache-provider=[cache provider string such as memory, redis, file. Default: memory]
|
||||
|
||||
var app = cli.App{
|
||||
Name: "answer",
|
||||
@ -17,20 +35,123 @@ func main() {
|
||||
Version: "0.1",
|
||||
Description: "",
|
||||
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "model",
|
||||
Value: "openai/gpt-4o",
|
||||
Usage: "model to use for answering the question, syntax: provider/model such as openai/gpt-4o",
|
||||
},
|
||||
|
||||
&cli.StringFlag{
|
||||
Name: "llm-key",
|
||||
Value: "",
|
||||
Usage: "key for the llm model (if empty, will use env var of PROVIDER_API_KEY, such as OPENAI_API_KEY)",
|
||||
},
|
||||
|
||||
&cli.StringFlag{
|
||||
Name: "search-provider",
|
||||
Value: "google",
|
||||
Usage: "search provider to use for searching the web",
|
||||
},
|
||||
|
||||
&cli.StringFlag{
|
||||
Name: "cache-provider",
|
||||
Value: "memory",
|
||||
Usage: "cache provider to use for caching search results",
|
||||
},
|
||||
},
|
||||
|
||||
Action: func(c *cli.Context) error {
|
||||
// if there is no question to answer, print usage
|
||||
if c.NArg() == 0 {
|
||||
return cli.ShowAppHelp(c)
|
||||
}
|
||||
var question answer.Question
|
||||
|
||||
// get the question
|
||||
fmt.Println("Head: ", c.Args().First())
|
||||
fmt.Println("Tail: ", c.Args().Tail())
|
||||
question.Question = strings.Join(c.Args(), " ")
|
||||
|
||||
switch c.String("cache-provider") {
|
||||
case "memory":
|
||||
panic("not implemented")
|
||||
|
||||
case "redis":
|
||||
panic("not implemented")
|
||||
|
||||
case "file":
|
||||
question.Cache = &cache.Directory{
|
||||
BaseFolder: "cache",
|
||||
MaxLife: 1 * 24 * time.Hour,
|
||||
}
|
||||
|
||||
default:
|
||||
panic("unknown cache provider")
|
||||
}
|
||||
|
||||
if question.Cache == nil {
|
||||
panic("cache is nil")
|
||||
}
|
||||
// wrap the cache in a hasher
|
||||
question.Cache = cache.ShaWrapper{
|
||||
Cache: question.Cache,
|
||||
}
|
||||
|
||||
switch c.String("search-provider") {
|
||||
case "google":
|
||||
question.Search = search.Google{Cache: question.Cache}
|
||||
|
||||
default:
|
||||
panic("unknown search provider")
|
||||
}
|
||||
|
||||
var llm gollm.LLM
|
||||
|
||||
model := c.String("model")
|
||||
|
||||
a := strings.Split(model, "/")
|
||||
|
||||
if len(a) != 2 {
|
||||
panic("invalid model, expected: provider/model (such as openai/gpt-4o)")
|
||||
}
|
||||
|
||||
switch a[0] {
|
||||
case "openai":
|
||||
llm = gollm.OpenAI(getKey(c.String("llm-key"), "OPENAI_API_KEY"))
|
||||
|
||||
case "anthropic":
|
||||
llm = gollm.Anthropic(getKey(c.String("llm-key"), "ANTHROPI_API_KEY"))
|
||||
|
||||
case "google":
|
||||
llm = gollm.Google(getKey(c.String("llm-key"), "GOOGLE_API_KEY"))
|
||||
|
||||
default:
|
||||
panic("unknown model provider")
|
||||
}
|
||||
|
||||
m, err := llm.ModelVersion(a[1])
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
question.Model = m
|
||||
|
||||
answers, err := answer.Answer(ctx, question)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for i, a := range answers {
|
||||
slog.Info("answer", "index", i, "answer", a)
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
app.Run()
|
||||
err := app.Run(os.Args)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Error: ", err)
|
||||
}
|
||||
}
|
||||
|
25
go.mod
25
go.mod
@ -4,6 +4,14 @@ go 1.23.2
|
||||
|
||||
replace gitea.stevedudenhoeffer.com/steve/go-llm => ../go-llm
|
||||
|
||||
require (
|
||||
gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20241031152103-f603010dee49
|
||||
github.com/advancedlogic/GoOse v0.0.0-20231203033844-ae6b36caf275
|
||||
github.com/playwright-community/playwright-go v0.4702.0
|
||||
github.com/rocketlaunchr/google-search v1.1.6
|
||||
github.com/urfave/cli v1.22.16
|
||||
)
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.115.0 // indirect
|
||||
cloud.google.com/go/ai v0.8.0 // indirect
|
||||
@ -11,16 +19,22 @@ require (
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.3.0 // indirect
|
||||
cloud.google.com/go/longrunning v0.5.7 // indirect
|
||||
gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20241031152103-f603010dee49 // indirect
|
||||
github.com/PuerkitoBio/goquery v1.8.1 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.2 // indirect
|
||||
github.com/antchfx/htmlquery v1.3.0 // indirect
|
||||
github.com/antchfx/xmlquery v1.3.15 // indirect
|
||||
github.com/antchfx/xpath v1.2.4 // indirect
|
||||
github.com/araddon/dateparse v0.0.0-20180729174819-cfd92a431d0e // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
|
||||
github.com/deckarep/golang-set/v2 v2.6.0 // indirect
|
||||
github.com/fatih/set v0.2.1 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/gigawattio/window v0.0.0-20180317192513-0f5467e35573 // indirect
|
||||
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
|
||||
github.com/go-logr/logr v1.4.1 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-resty/resty/v2 v2.0.0 // indirect
|
||||
github.com/go-stack/stack v1.8.1 // indirect
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/gocolly/colly/v2 v2.1.0 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
@ -30,21 +44,26 @@ require (
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
||||
github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 // indirect
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/liushuangls/go-anthropic/v2 v2.8.0 // indirect
|
||||
github.com/rocketlaunchr/google-search v1.1.6 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.3 // indirect
|
||||
github.com/olekukonko/tablewriter v0.0.0-20180506121414-d4647c9c7a84 // indirect
|
||||
github.com/pkg/errors v0.8.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
|
||||
github.com/sashabaranov/go-openai v1.31.0 // indirect
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
|
||||
github.com/temoto/robotstxt v1.1.2 // indirect
|
||||
github.com/urfave/cli v1.22.16 // indirect
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect
|
||||
go.opentelemetry.io/otel v1.26.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.26.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.26.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
golang.org/x/crypto v0.24.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect
|
||||
golang.org/x/net v0.26.0 // indirect
|
||||
golang.org/x/oauth2 v0.21.0 // indirect
|
||||
golang.org/x/sync v0.9.0 // indirect
|
||||
|
@ -2,14 +2,13 @@ package answer
|
||||
|
||||
import (
|
||||
"answer/pkg/cache"
|
||||
"answer/pkg/extractor"
|
||||
"answer/pkg/search"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
go_llm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
||||
"io"
|
||||
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
@ -22,7 +21,7 @@ type Question struct {
|
||||
// Question is the question to answer
|
||||
Question string
|
||||
|
||||
Model go_llm.ChatCompletion
|
||||
Model gollm.ChatCompletion
|
||||
|
||||
Search search.Search
|
||||
|
||||
@ -58,11 +57,11 @@ type Result struct {
|
||||
Error error
|
||||
}
|
||||
|
||||
func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []go_llm.ToolCall) []Result {
|
||||
func fanExecuteToolCalls(ctx context.Context, toolBox *gollm.ToolBox, calls []gollm.ToolCall) []Result {
|
||||
var results []Result
|
||||
var resultsOutput = make(chan Result, len(calls))
|
||||
|
||||
fnCall := func(call go_llm.ToolCall) Result {
|
||||
fnCall := func(call gollm.ToolCall) Result {
|
||||
str, err := toolBox.Execute(ctx, call)
|
||||
if err != nil {
|
||||
return Result{
|
||||
@ -76,7 +75,7 @@ func fanExecuteToolCalls(ctx context.Context, toolBox *go_llm.ToolBox, calls []g
|
||||
}
|
||||
|
||||
for _, call := range calls {
|
||||
go func(call go_llm.ToolCall) {
|
||||
go func(call gollm.ToolCall) {
|
||||
resultsOutput <- fnCall(call)
|
||||
}(call)
|
||||
}
|
||||
@ -97,7 +96,7 @@ type article struct {
|
||||
Body string
|
||||
}
|
||||
|
||||
func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
|
||||
func extractArticle(ctx context.Context, c cache.Cache, u *url.URL) (res article, err error) {
|
||||
defer func() {
|
||||
e := recover()
|
||||
|
||||
@ -110,49 +109,38 @@ func extractArticle(ctx context.Context, u *url.URL) (res article, err error) {
|
||||
}
|
||||
}()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
|
||||
extractors := extractor.MultiExtractor(
|
||||
extractor.CacheExtractor{
|
||||
Cache: c,
|
||||
Tag: "goose",
|
||||
Extractor: extractor.GooseExtractor{},
|
||||
},
|
||||
extractor.CacheExtractor{
|
||||
Cache: c,
|
||||
Tag: "playwright",
|
||||
Extractor: extractor.PlaywrightExtractor{},
|
||||
},
|
||||
)
|
||||
|
||||
a, err := extractors.Extract(ctx, u.String())
|
||||
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("error creating request: %w", err)
|
||||
return article{
|
||||
URL: "",
|
||||
Title: "",
|
||||
Body: "",
|
||||
}, err
|
||||
}
|
||||
|
||||
resp, err := c.cl.Do(req)
|
||||
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("error getting response: %w", err)
|
||||
}
|
||||
|
||||
defer func(Body io.ReadCloser) {
|
||||
err := Body.Close()
|
||||
if err != nil {
|
||||
slog.Error("error closing body", "error", err)
|
||||
}
|
||||
}(resp.Body)
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return "", fmt.Errorf("bad response: %d: %s", resp.StatusCode, resp.Status)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error reading body: %w", err)
|
||||
}
|
||||
|
||||
g := goose.New()
|
||||
article, err := g.ExtractFromRawHTML(string(b), target)
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error extracting article: %w", err)
|
||||
}
|
||||
|
||||
return article.CleanedText, nil
|
||||
panic("not implemented")
|
||||
return article{}, nil
|
||||
return article{
|
||||
URL: a.URL,
|
||||
Title: a.Title,
|
||||
Body: a.Body,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (string, error) {
|
||||
fnAnswer := go_llm.NewFunction(
|
||||
fnAnswer := gollm.NewFunction(
|
||||
"answer",
|
||||
"The answer from the given text that answers the question.",
|
||||
func(ctx context.Context, args struct {
|
||||
@ -161,29 +149,29 @@ func doesTextAnswerQuestion(ctx context.Context, q Question, text string) (strin
|
||||
return args.Answer, nil
|
||||
})
|
||||
|
||||
fnNoAnswer := go_llm.NewFunction(
|
||||
fnNoAnswer := gollm.NewFunction(
|
||||
"no_answer",
|
||||
"Indicate that the text does not answer the question.",
|
||||
func(ctx context.Context, args struct{}) (string, error) {
|
||||
return "", nil
|
||||
})
|
||||
|
||||
req := go_llm.Request{
|
||||
Messages: []go_llm.Message{
|
||||
req := gollm.Request{
|
||||
Messages: []gollm.Message{
|
||||
{
|
||||
Role: go_llm.RoleSystem,
|
||||
Role: gollm.RoleSystem,
|
||||
Text: "Evaluate the given text to see if it answers the question from the user. The text is as follows:",
|
||||
},
|
||||
{
|
||||
Role: go_llm.RoleSystem,
|
||||
Role: gollm.RoleSystem,
|
||||
Text: text,
|
||||
},
|
||||
{
|
||||
Role: go_llm.RoleUser,
|
||||
Role: gollm.RoleUser,
|
||||
Text: q.Question,
|
||||
},
|
||||
},
|
||||
Toolbox: go_llm.NewToolBox(fnAnswer, fnNoAnswer),
|
||||
Toolbox: gollm.NewToolBox(fnAnswer, fnNoAnswer),
|
||||
}
|
||||
|
||||
res, err := q.Model.ChatComplete(ctx, req)
|
||||
@ -224,7 +212,7 @@ func functionSearch(ctx context.Context, q Question, searchTerm string) (string,
|
||||
continue
|
||||
}
|
||||
|
||||
a, err := extractArticle(ctx, u)
|
||||
a, err := extractArticle(ctx, q.Cache, u)
|
||||
|
||||
if err != nil {
|
||||
continue
|
||||
@ -248,7 +236,7 @@ func functionSearch(ctx context.Context, q Question, searchTerm string) (string,
|
||||
}
|
||||
|
||||
func functionThink(ctx context.Context, q Question) (string, error) {
|
||||
fnAnswer := go_llm.NewFunction(
|
||||
fnAnswer := gollm.NewFunction(
|
||||
"answer",
|
||||
"Answer the question.",
|
||||
func(ctx context.Context, args struct {
|
||||
@ -258,18 +246,18 @@ func functionThink(ctx context.Context, q Question) (string, error) {
|
||||
})
|
||||
|
||||
var temp float32 = 0.8
|
||||
req := go_llm.Request{
|
||||
Messages: []go_llm.Message{
|
||||
req := gollm.Request{
|
||||
Messages: []gollm.Message{
|
||||
{
|
||||
Role: go_llm.RoleSystem,
|
||||
Role: gollm.RoleSystem,
|
||||
Text: "Answer the given question as accurately and concisely as possible using the answer function.",
|
||||
},
|
||||
{
|
||||
Role: go_llm.RoleUser,
|
||||
Role: gollm.RoleUser,
|
||||
Text: q.Question,
|
||||
},
|
||||
},
|
||||
Toolbox: go_llm.NewToolBox(fnAnswer),
|
||||
Toolbox: gollm.NewToolBox(fnAnswer),
|
||||
Temperature: &temp,
|
||||
}
|
||||
|
||||
@ -291,7 +279,7 @@ func functionThink(ctx context.Context, q Question) (string, error) {
|
||||
}
|
||||
|
||||
func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
fnSearch := go_llm.NewFunction(
|
||||
fnSearch := gollm.NewFunction(
|
||||
"search",
|
||||
"Search the web for an answer to a question. You can call this function up to "+fmt.Sprint(o.MaxSearches)+" times.",
|
||||
func(ctx context.Context, args struct {
|
||||
@ -304,7 +292,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
return functionSearch(ctx, q2, args.SearchQuery)
|
||||
})
|
||||
|
||||
fnThink := go_llm.NewFunction(
|
||||
fnThink := gollm.NewFunction(
|
||||
"think",
|
||||
"Think about a question. This is useful for breaking down complex questions into smaller parts that are easier to answer.",
|
||||
func(ctx context.Context, args struct {
|
||||
@ -316,7 +304,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
return functionThink(ctx, q2)
|
||||
})
|
||||
|
||||
fnAnswer := go_llm.NewFunction(
|
||||
fnAnswer := gollm.NewFunction(
|
||||
"answer",
|
||||
"You definitively answer a question, if you call this it means you know the answer and do not need to search for it or use any other function to find it",
|
||||
func(ctx context.Context, args struct {
|
||||
@ -325,7 +313,7 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
return args.Answer, nil
|
||||
})
|
||||
|
||||
var funcs = []*go_llm.Function{fnAnswer}
|
||||
var funcs = []*gollm.Function{fnAnswer}
|
||||
|
||||
if o.MaxSearches > 0 {
|
||||
funcs = append(funcs, fnSearch)
|
||||
@ -337,18 +325,18 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
|
||||
var temp float32 = 0.8
|
||||
|
||||
req := go_llm.Request{
|
||||
Messages: []go_llm.Message{
|
||||
req := gollm.Request{
|
||||
Messages: []gollm.Message{
|
||||
{
|
||||
Role: go_llm.RoleSystem,
|
||||
Role: gollm.RoleSystem,
|
||||
Text: "You are being asked to answer a question. You must respond with a function. You can answer it if you know the answer, or if some functions exist you can use those to help you find the answer.",
|
||||
},
|
||||
{
|
||||
Role: go_llm.RoleUser,
|
||||
Role: gollm.RoleUser,
|
||||
Text: q.Question,
|
||||
},
|
||||
},
|
||||
Toolbox: go_llm.NewToolBox(funcs...),
|
||||
Toolbox: gollm.NewToolBox(funcs...),
|
||||
Temperature: &temp,
|
||||
}
|
||||
|
||||
@ -366,29 +354,29 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
res.Choices = res.Choices[:o.MaxSearches]
|
||||
}
|
||||
|
||||
var answers []QuestionAnswer
|
||||
choicesOutput := make(chan QuestionAnswer, len(res.Choices))
|
||||
var answers Answers
|
||||
choicesOutput := make(chan string, len(res.Choices))
|
||||
|
||||
for _, choice := range res.Choices {
|
||||
fnChoice := func(choice go_llm.ResponseChoice) QuestionAnswer {
|
||||
var calls []CallResult
|
||||
var callsOutput = make(chan CallResult, len(choice.Calls))
|
||||
fnCall := func(call go_llm.ToolCall) CallResult {
|
||||
fnChoice := func(choice gollm.ResponseChoice) string {
|
||||
var calls []Result
|
||||
var callsOutput = make(chan Result, len(choice.Calls))
|
||||
fnCall := func(call gollm.ToolCall) Result {
|
||||
str, err := req.Toolbox.Execute(ctx, call)
|
||||
|
||||
if err != nil {
|
||||
return CallResult{
|
||||
return Result{
|
||||
Error: err,
|
||||
}
|
||||
}
|
||||
|
||||
return CallResult{
|
||||
return Result{
|
||||
Result: str,
|
||||
}
|
||||
}
|
||||
|
||||
for _, call := range choice.Calls {
|
||||
go func(call go_llm.ToolCall) {
|
||||
go func(call gollm.ToolCall) {
|
||||
callsOutput <- fnCall(call)
|
||||
}(call)
|
||||
}
|
||||
@ -402,8 +390,12 @@ func (o Options) Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
|
||||
}
|
||||
|
||||
answers = append(answers, fnChoice(choice))
|
||||
|
||||
}
|
||||
|
||||
return answers, nil
|
||||
|
||||
}
|
||||
|
||||
func Answer(ctx context.Context, q Question) (Answers, error) {
|
||||
|
51
pkg/cache/cache.go
vendored
51
pkg/cache/cache.go
vendored
@ -1,6 +1,16 @@
|
||||
package cache
|
||||
|
||||
import "io"
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotFound is returned when the key is not found in the cache
|
||||
ErrNotFound = errors.New("key not found")
|
||||
)
|
||||
|
||||
type Cache interface {
|
||||
Get(key string, writer io.Writer) error
|
||||
@ -13,3 +23,42 @@ type Cache interface {
|
||||
|
||||
Delete(key string) error
|
||||
}
|
||||
|
||||
type ShaWrapper struct {
|
||||
Cache Cache
|
||||
}
|
||||
|
||||
func (s ShaWrapper) hash(key string) string {
|
||||
// hash the key to a sha256
|
||||
hash := sha256.Sum256([]byte(key))
|
||||
|
||||
// return the hex representation of the hash
|
||||
return fmt.Sprintf("%x", hash)
|
||||
}
|
||||
func (s ShaWrapper) Get(key string, writer io.Writer) error {
|
||||
return s.Cache.Get(s.hash(key), writer)
|
||||
}
|
||||
|
||||
func (s ShaWrapper) GetString(key string) (string, error) {
|
||||
return s.Cache.GetString(s.hash(key))
|
||||
}
|
||||
|
||||
func (s ShaWrapper) GetJSON(key string, value any) error {
|
||||
return s.Cache.GetJSON(s.hash(key), value)
|
||||
}
|
||||
|
||||
func (s ShaWrapper) Set(key string, value io.Reader) error {
|
||||
return s.Cache.Set(s.hash(key), value)
|
||||
}
|
||||
|
||||
func (s ShaWrapper) SetJSON(key string, value any) error {
|
||||
return s.Cache.SetJSON(s.hash(key), value)
|
||||
}
|
||||
|
||||
func (s ShaWrapper) SetString(key string, value string) error {
|
||||
return s.Cache.SetString(s.hash(key), value)
|
||||
}
|
||||
|
||||
func (s ShaWrapper) Delete(key string) error {
|
||||
return s.Cache.Delete(s.hash(key))
|
||||
}
|
||||
|
14
pkg/cache/directory.go
vendored
14
pkg/cache/directory.go
vendored
@ -14,8 +14,7 @@ import (
|
||||
type Directory struct {
|
||||
BaseFolder string
|
||||
MaxLife time.Duration
|
||||
|
||||
lock sync.Mutex
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
var _ Cache = &Directory{}
|
||||
@ -76,7 +75,16 @@ func (d *Directory) AutoCleanupRoutine(ctx context.Context) error {
|
||||
func (d *Directory) openFile(key string) (*os.File, error) {
|
||||
path := d.GetPath(key)
|
||||
|
||||
return os.Open(path)
|
||||
res, err := os.Open(path)
|
||||
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (d *Directory) Set(key string, value io.Reader) error {
|
||||
|
87
pkg/extractor/extractor.go
Normal file
87
pkg/extractor/extractor.go
Normal file
@ -0,0 +1,87 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"answer/pkg/cache"
|
||||
"context"
|
||||
"errors"
|
||||
)
|
||||
|
||||
var ErrFailedToExtract = errors.New("failed to extract")
|
||||
|
||||
type Article struct {
|
||||
URL string
|
||||
Title string
|
||||
Body string
|
||||
}
|
||||
|
||||
// Extractor is an interface of systems that can extract the contents of
|
||||
type Extractor interface {
|
||||
Extract(ctx context.Context, url string) (Article, error)
|
||||
}
|
||||
|
||||
type multiExtractor struct {
|
||||
extractors []Extractor
|
||||
}
|
||||
|
||||
var _ Extractor = multiExtractor{}
|
||||
|
||||
// Extract will try to extract the contents of a URL using all the extractors, and return the first successful result.
|
||||
func (m multiExtractor) Extract(ctx context.Context, url string) (Article, error) {
|
||||
var errs []error
|
||||
for _, e := range m.extractors {
|
||||
article, err := e.Extract(ctx, url)
|
||||
if err == nil {
|
||||
return article, nil
|
||||
}
|
||||
|
||||
if errors.Is(err, ErrFailedToExtract) {
|
||||
continue
|
||||
}
|
||||
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return Article{}, errors.Join(errs...)
|
||||
}
|
||||
return Article{}, ErrFailedToExtract
|
||||
}
|
||||
|
||||
func MultiExtractor(e ...Extractor) Extractor {
|
||||
return multiExtractor{extractors: e}
|
||||
}
|
||||
|
||||
type CacheExtractor struct {
|
||||
Cache cache.Cache
|
||||
Tag string
|
||||
Extractor Extractor
|
||||
}
|
||||
|
||||
var _ Extractor = CacheExtractor{}
|
||||
|
||||
func (c CacheExtractor) Extract(ctx context.Context, url string) (Article, error) {
|
||||
tag := c.Tag
|
||||
if tag == "" {
|
||||
tag = "defaultextractor:"
|
||||
}
|
||||
key := tag + ":" + url
|
||||
|
||||
var article Article
|
||||
|
||||
err := c.Cache.GetJSON(key, &article)
|
||||
if err == nil {
|
||||
return article, nil
|
||||
}
|
||||
|
||||
article, err = c.Extractor.Extract(ctx, url)
|
||||
if err != nil {
|
||||
return Article{}, err
|
||||
}
|
||||
|
||||
err = c.Cache.SetJSON(key, article)
|
||||
if err != nil {
|
||||
return Article{}, err
|
||||
}
|
||||
|
||||
return article, nil
|
||||
}
|
25
pkg/extractor/goose.go
Normal file
25
pkg/extractor/goose.go
Normal file
@ -0,0 +1,25 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"context"
|
||||
goose "github.com/advancedlogic/GoOse"
|
||||
)
|
||||
|
||||
type GooseExtractor struct {
|
||||
}
|
||||
|
||||
func (GooseExtractor) Extract(ctx context.Context, url string) (Article, error) {
|
||||
var res = Article{
|
||||
URL: url,
|
||||
}
|
||||
g := goose.New()
|
||||
|
||||
article, err := g.ExtractFromURL(url)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
res.Body = article.CleanedText
|
||||
res.Title = article.Title
|
||||
return res, nil
|
||||
}
|
81
pkg/extractor/playwright.go
Normal file
81
pkg/extractor/playwright.go
Normal file
@ -0,0 +1,81 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"os"
|
||||
)
|
||||
|
||||
type PlaywrightExtractor struct {
|
||||
}
|
||||
|
||||
var _ Extractor = PlaywrightExtractor{}
|
||||
|
||||
func getReadabilityJS() (string, error) {
|
||||
data, err := os.ReadFile("readability.js")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func (p PlaywrightExtractor) Extract(_ context.Context, url string) (Article, error) {
|
||||
var article = Article{
|
||||
URL: url,
|
||||
}
|
||||
pw, err := playwright.Run()
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
defer pw.Stop()
|
||||
|
||||
browser, err := pw.Chromium.Launch()
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
defer browser.Close()
|
||||
|
||||
page, err := browser.NewPage()
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
defer page.Close()
|
||||
|
||||
_, err = page.Goto(url)
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
|
||||
// Inject Readability.js
|
||||
readabilityJS, err := getReadabilityJS()
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
|
||||
_, err = page.AddScriptTag(playwright.PageAddScriptTagOptions{
|
||||
Content: &readabilityJS,
|
||||
})
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
|
||||
// Run Readability and get the article content
|
||||
content, err := page.Evaluate(`() => {
|
||||
let article = new Readability(document).parse();
|
||||
return article ? article.textContent : null;
|
||||
}`)
|
||||
if err != nil {
|
||||
return article, err
|
||||
}
|
||||
|
||||
text, ok := content.(string)
|
||||
if !ok {
|
||||
return article, fmt.Errorf("failed to convert content to string")
|
||||
}
|
||||
|
||||
article.Body = text
|
||||
article.Title, _ = page.Title()
|
||||
|
||||
return article, nil
|
||||
}
|
@ -1,18 +1,30 @@
|
||||
package search
|
||||
|
||||
import (
|
||||
"answer/pkg/cache"
|
||||
"context"
|
||||
googlesearch "github.com/rocketlaunchr/google-search"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type Google struct {
|
||||
Cache cache.Cache
|
||||
}
|
||||
|
||||
var _ Search = Google{}
|
||||
|
||||
func (Google) Search(ctx context.Context, search string) ([]Result, error) {
|
||||
res, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{
|
||||
func (g Google) Search(ctx context.Context, search string) ([]Result, error) {
|
||||
var res []Result
|
||||
|
||||
key := "google:" + search
|
||||
|
||||
err := g.Cache.GetJSON(key, &res)
|
||||
|
||||
if err == nil {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
results, err := googlesearch.Search(ctx, search, googlesearch.SearchOptions{
|
||||
CountryCode: "",
|
||||
LanguageCode: "",
|
||||
Limit: 0,
|
||||
@ -27,18 +39,20 @@ func (Google) Search(ctx context.Context, search string) ([]Result, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []Result
|
||||
|
||||
// just in case, sort the res by rank, as the api does not mention it is sorted
|
||||
sort.Slice(res, func(i, j int) bool {
|
||||
return res[i].Rank < res[j].Rank
|
||||
return results[i].Rank < results[j].Rank
|
||||
})
|
||||
|
||||
for _, r := range res {
|
||||
results = append(results, Result{
|
||||
for _, r := range results {
|
||||
res = append(res, Result{
|
||||
Title: r.Title,
|
||||
URL: r.URL,
|
||||
Description: r.Description,
|
||||
})
|
||||
}
|
||||
|
||||
_ = g.Cache.SetJSON(key, res)
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user