From 6c30fdf4d8d4f19e99465b93f3164b31dc0747ac Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 21 Feb 2025 18:40:25 -0500 Subject: [PATCH] Add DuckDuckGo support and refactor caching system Introduced DuckDuckGo as a new search provider alongside Google. Implemented a flexible caching system with in-memory, file-based, and no-op cache options to improve modularity. Updated dependencies and revised the project structure for improved maintainability. --- cmd/answer.go | 19 +++++-- go.mod | 104 ++++++++++++++++++++------------------- pkg/cache/memory.go | 78 +++++++++++++++++++++++++++++ pkg/cache/nop.go | 38 ++++++++++++++ pkg/search/duckduckgo.go | 83 +++++++++++++++++++++++++++++++ 5 files changed, 266 insertions(+), 56 deletions(-) create mode 100644 pkg/cache/memory.go create mode 100644 pkg/cache/nop.go create mode 100644 pkg/search/duckduckgo.go diff --git a/cmd/answer.go b/cmd/answer.go index 3db35d9..cdd484f 100644 --- a/cmd/answer.go +++ b/cmd/answer.go @@ -5,13 +5,15 @@ import ( "answer/pkg/cache" "answer/pkg/search" "context" - gollm "gitea.stevedudenhoeffer.com/steve/go-llm" - "github.com/joho/godotenv" - "github.com/urfave/cli" "log/slog" "os" "strings" "time" + + "github.com/joho/godotenv" + "github.com/urfave/cli" + + gollm "gitea.stevedudenhoeffer.com/steve/go-llm" ) func getKey(key string, env string) string { @@ -57,13 +59,13 @@ func main() { &cli.StringFlag{ Name: "search-provider", - Value: "google", + Value: "duckduckgo", Usage: "search provider to use for searching the web", }, &cli.StringFlag{ Name: "cache-provider", - Value: "memory", + Value: "file", Usage: "cache provider to use for caching search results", }, }, @@ -111,6 +113,13 @@ func main() { case "google": question.Search = search.Google{Cache: question.Cache} + case "duckduckgo": + var err error + question.Search, err = search.NewDuckDuckGo(question.Cache) + if err != nil { + panic("failed to create duckduckgo search: " + err.Error()) + } + default: panic("unknown search provider") } diff --git a/go.mod b/go.mod index c714d5d..6d031f0 100644 --- a/go.mod +++ b/go.mod @@ -2,81 +2,83 @@ module answer go 1.23.2 -replace gitea.stevedudenhoeffer.com/steve/go-llm => ../go-llm - replace github.com/rocketlaunchr/google-search => github.com/chrisjoyce911/google-search v0.0.0-20230910003754-e501aedf805a require ( - gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20241031152103-f603010dee49 + gitea.stevedudenhoeffer.com/steve/go-llm v0.0.0-20250123045620-0d909edd44d9 github.com/advancedlogic/GoOse v0.0.0-20231203033844-ae6b36caf275 github.com/joho/godotenv v1.5.1 - github.com/playwright-community/playwright-go v0.4702.0 + github.com/playwright-community/playwright-go v0.5001.0 github.com/rocketlaunchr/google-search v1.1.6 github.com/urfave/cli v1.22.16 ) require ( - cloud.google.com/go v0.115.0 // indirect - cloud.google.com/go/ai v0.8.0 // indirect - cloud.google.com/go/auth v0.6.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect - cloud.google.com/go/compute/metadata v0.3.0 // indirect - cloud.google.com/go/longrunning v0.5.7 // indirect - github.com/PuerkitoBio/goquery v1.8.1 // indirect - github.com/andybalholm/cascadia v1.3.2 // indirect - github.com/antchfx/htmlquery v1.3.0 // indirect - github.com/antchfx/xmlquery v1.3.15 // indirect - github.com/antchfx/xpath v1.2.4 // indirect - github.com/araddon/dateparse v0.0.0-20180729174819-cfd92a431d0e // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect - github.com/deckarep/golang-set/v2 v2.6.0 // indirect + cloud.google.com/go v0.118.3 // indirect + cloud.google.com/go/ai v0.10.0 // indirect + cloud.google.com/go/auth v0.15.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect + cloud.google.com/go/compute/metadata v0.6.0 // indirect + cloud.google.com/go/longrunning v0.6.4 // indirect + gitea.stevedudenhoeffer.com/steve/go-extractor v0.0.0-20250123020607-964a98a5a884 // indirect + github.com/PuerkitoBio/goquery v1.10.2 // indirect + github.com/andybalholm/cascadia v1.3.3 // indirect + github.com/antchfx/htmlquery v1.3.4 // indirect + github.com/antchfx/xmlquery v1.4.4 // indirect + github.com/antchfx/xpath v1.3.3 // indirect + github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect + github.com/deckarep/golang-set/v2 v2.7.0 // indirect github.com/fatih/set v0.2.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/gigawattio/window v0.0.0-20180317192513-0f5467e35573 // indirect github.com/go-jose/go-jose/v3 v3.0.3 // indirect - github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/go-resty/resty/v2 v2.0.0 // indirect + github.com/go-resty/resty/v2 v2.16.5 // indirect + github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect + github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f // indirect github.com/go-stack/stack v1.8.1 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/gocolly/colly/v2 v2.1.0 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect + github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/google/generative-ai-go v0.18.0 // indirect - github.com/google/s2a-go v0.1.7 // indirect + github.com/google/generative-ai-go v0.19.0 // indirect + github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.12.5 // indirect - github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect + github.com/googleapis/gax-go/v2 v2.14.1 // indirect + github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect github.com/kennygrant/sanitize v1.2.4 // indirect - github.com/liushuangls/go-anthropic/v2 v2.8.0 // indirect - github.com/mattn/go-runewidth v0.0.3 // indirect - github.com/olekukonko/tablewriter v0.0.0-20180506121414-d4647c9c7a84 // indirect - github.com/pkg/errors v0.8.1 // indirect + github.com/liushuangls/go-anthropic/v2 v2.13.1 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/rivo/uniseg v0.4.7 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect - github.com/sashabaranov/go-openai v1.31.0 // indirect + github.com/sashabaranov/go-openai v1.37.0 // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect github.com/temoto/robotstxt v1.1.2 // indirect - go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect - go.opentelemetry.io/otel v1.26.0 // indirect - go.opentelemetry.io/otel/metric v1.26.0 // indirect - go.opentelemetry.io/otel/trace v1.26.0 // indirect - go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.24.0 // indirect - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect - golang.org/x/net v0.26.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sync v0.9.0 // indirect - golang.org/x/sys v0.21.0 // indirect - golang.org/x/text v0.16.0 // indirect - golang.org/x/time v0.5.0 // indirect - google.golang.org/api v0.186.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect + go.opentelemetry.io/otel v1.34.0 // indirect + go.opentelemetry.io/otel/metric v1.34.0 // indirect + go.opentelemetry.io/otel/trace v1.34.0 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect + golang.org/x/net v0.35.0 // indirect + golang.org/x/oauth2 v0.26.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/text v0.22.0 // indirect + golang.org/x/time v0.10.0 // indirect + google.golang.org/api v0.222.0 // indirect google.golang.org/appengine v1.6.8 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect - google.golang.org/grpc v1.64.1 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250219182151-9fdb1cabc7b2 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 // indirect + google.golang.org/grpc v1.70.0 // indirect + google.golang.org/protobuf v1.36.5 // indirect ) diff --git a/pkg/cache/memory.go b/pkg/cache/memory.go new file mode 100644 index 0000000..1ed2e97 --- /dev/null +++ b/pkg/cache/memory.go @@ -0,0 +1,78 @@ +package cache + +import ( + "encoding/json" + "io" +) + +type memoryCache struct { + data map[string][]byte +} + +var _ Cache = &memoryCache{} + +func NewMemoryCache() (Cache, error) { + return &memoryCache{ + data: make(map[string][]byte), + }, nil +} + +func (m *memoryCache) Get(key string, writer io.Writer) error { + data, ok := m.data[key] + if ok { + _, err := writer.Write(data) + return err + } + + return ErrNotFound +} + +func (m *memoryCache) GetString(key string) (string, error) { + data, ok := m.data[key] + if ok { + return string(data), nil + } + + return "", ErrNotFound +} + +func (m *memoryCache) GetJSON(key string, value interface{}) error { + data, ok := m.data[key] + if ok { + return json.Unmarshal(data, value) + } + + return ErrNotFound +} + +func (m *memoryCache) Set(key string, value io.Reader) error { + data, err := io.ReadAll(value) + if err != nil { + return err + } + + m.data[key] = data + + return nil +} + +func (m *memoryCache) SetJSON(key string, value interface{}) error { + data, err := json.Marshal(value) + if err != nil { + return err + } + + m.data[key] = data + + return nil +} + +func (m *memoryCache) SetString(key string, value string) error { + m.data[key] = []byte(value) + return nil +} + +func (m *memoryCache) Delete(key string) error { + delete(m.data, key) + return nil +} diff --git a/pkg/cache/nop.go b/pkg/cache/nop.go new file mode 100644 index 0000000..be54764 --- /dev/null +++ b/pkg/cache/nop.go @@ -0,0 +1,38 @@ +package cache + +import ( + "io" +) + +type Nop struct { +} + +var _ Cache = Nop{} + +func (Nop) Get(_ string, _ io.Writer) error { + return ErrNotFound +} + +func (Nop) GetString(_ string) (string, error) { + return "", ErrNotFound +} + +func (Nop) GetJSON(_ string, _ interface{}) error { + return ErrNotFound +} + +func (Nop) Set(_ string, _ io.Reader) error { + return nil +} + +func (Nop) SetJSON(_ string, _ interface{}) error { + return nil +} + +func (Nop) SetString(_ string, _ string) error { + return nil +} + +func (Nop) Delete(_ string) error { + return nil +} diff --git a/pkg/search/duckduckgo.go b/pkg/search/duckduckgo.go new file mode 100644 index 0000000..7a331ce --- /dev/null +++ b/pkg/search/duckduckgo.go @@ -0,0 +1,83 @@ +package search + +import ( + "answer/pkg/cache" + "context" + "fmt" + "time" + + "gitea.stevedudenhoeffer.com/steve/go-extractor" + + "gitea.stevedudenhoeffer.com/steve/go-extractor/sites/duckduckgo" +) + +type duckDuckGo struct { + Cache cache.Cache + Browser extractor.Browser +} + +func NewDuckDuckGo(c cache.Cache) (Search, error) { + timeout := 60 * time.Second + browser, err := extractor.NewPlayWrightBrowser(extractor.PlayWrightBrowserOptions{Timeout: &timeout}) + + if err != nil { + return nil, fmt.Errorf("failed to create browser: %w", err) + } + + return duckDuckGo{ + Cache: c, + Browser: browser, + }, nil +} + +var _ Search = duckDuckGo{} + +func (d duckDuckGo) Search(ctx context.Context, search string) ([]Result, error) { + var res []Result + + key := "duckduckgo:" + search + + err := d.Cache.GetJSON(key, &res) + + if err == nil { + return res, nil + } + + results, err := d.searchDuckDuckGo(ctx, search) + if err != nil { + return nil, err + } + + for _, r := range results { + res = append(res, Result{ + Title: r.Title, + URL: r.URL, + Description: r.Description, + }) + } + + _ = d.Cache.SetJSON(key, res) + + return res, nil +} + +func (d duckDuckGo) searchDuckDuckGo(ctx context.Context, search string) ([]Result, error) { + cfg := duckduckgo.DefaultConfig + + r, err := cfg.Search(ctx, d.Browser, search) + if err != nil { + return nil, err + } + + res := make([]Result, len(r)) + + for i, v := range r { + res[i] = Result{ + URL: v.URL, + Title: v.Title, + Description: v.Description, + } + } + + return res, nil +}