answer/pkg/agents/knowledge_processor.go
Steve Dudenhoeffer 693ac4e6a7 Add core implementation for AI-powered question answering
Introduce multiple agents, tools, and utilities for processing, extracting, and answering user-provided questions using LLMs and external data. Key features include knowledge processing, question splitting, search term generation, and contextual knowledge handling.
2025-03-21 11:10:48 -04:00

187 lines
4.9 KiB
Go

package agents
import (
"context"
"fmt"
"regexp"
"strconv"
"strings"
"gitea.stevedudenhoeffer.com/steve/answer/pkg/agents/shared"
gollm "gitea.stevedudenhoeffer.com/steve/go-llm"
)
type KnowledgeProcessor struct {
Model gollm.ChatCompletion
ContextualInformation []string
}
// Process takes a knowledge object and processes it into a response string.
func (a KnowledgeProcessor) Process(ctx context.Context, knowledge shared.Knowledge) (string, error) {
originalQuestions := strings.Join(knowledge.OriginalQuestions, "\n")
infoGained := ""
// group all the gained knowledge by source
var m = map[string][]string{}
for _, k := range knowledge.Knowledge {
m[k.Source] = append(m[k.Source], k.Info)
}
// now order them in a list so they can be referenced by index
type source struct {
source string
info []string
}
var sources []source
for k, v := range m {
sources = append(sources, source{
source: k,
info: v,
})
if len(infoGained) > 0 {
infoGained += "\n"
}
infoGained += strings.Join(v, "\n")
}
systemPrompt := `I am trying to answer a question, and I gathered some knowledge in an attempt to do so. Here is what I am trying to answer:
` + originalQuestions + `
Here is the knowledge I have gathered from ` + fmt.Sprint(len(sources)) + ` sources:
` + infoGained
if len(knowledge.RemainingQuestions) > 0 {
systemPrompt += "\n\nI still have some questions that I could not find an answer to:\n" + strings.Join(knowledge.RemainingQuestions, "\n")
}
systemPrompt += "\n\nUsing the sources, write an answer to the original question. Note any information that wasn't able to be answered."
req := gollm.Request{
Messages: []gollm.Message{
{
Role: gollm.RoleSystem,
Text: systemPrompt,
},
},
}
if len(a.ContextualInformation) > 0 {
req.Messages = append(req.Messages, gollm.Message{
Role: gollm.RoleSystem,
Text: "Some contextual information you should be aware of: " + strings.Join(a.ContextualInformation, "\n"),
})
}
resp, err := a.Model.ChatComplete(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to chat complete: %w", err)
}
systemPrompt = `I am trying to source an analysis of information I have gathered.
To do this I will provide you with all of the sourced information I have gathered in the format of:
[Source]
- Information
- Information
- Information
Where Source will be a number from 1 to ` + fmt.Sprint(len(sources)) + ` and Information will be the information gathered from that source.
You should then read the information provided by the user and tag the information with citations from the sources provided. If a fact is provided by multiple sources, you should tag it with all of the sources that provide that information.
For instance, if the sourced data were:
[1]
- The diameter of the moon is 3,474.8 km
- The moon's age is 4.53 billion years
[2]
- The moon's age is 4.53 billion years
[3]
- The moon is on average 238,855 miles away from the Earth
And the user provided the following information:
The moon is 4.5 billion years old, 238,855 miles away from the Earth, and has a diameter of 3,474.8 km.
You would then tag the information with the sources like so:
The moon is 4.5 billion years old [1,2], 238,855 miles away from the Earth [3], and has a diameter of 3,474.8 km [1].`
providedIntel := `Here is the information I have gathered:
`
for i, s := range sources {
providedIntel += "[" + fmt.Sprint(i+1) + "]\n"
for _, info := range s.info {
providedIntel += " - " + info + "\n"
}
}
summarizedData := `Here is the I need you to source with citations:
` + resp.Choices[0].Content
req = gollm.Request{
Messages: []gollm.Message{
{
Role: gollm.RoleSystem,
Text: systemPrompt,
},
{
Role: gollm.RoleSystem,
Text: providedIntel,
},
{
Role: gollm.RoleUser,
Text: summarizedData,
},
},
}
resp, err = a.Model.ChatComplete(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to chat complete: %w", err)
}
// now go through the response and find all citations
// use this by looking for \[[\d+,]+\]
// then use the number to find the source
re := regexp.MustCompile(`\[([\d,\s]+)]`)
// find all the citations
citations := re.FindAllString(resp.Choices[0].Content, -1)
// now we need to find the sources
lookup := map[int][]string{}
for _, c := range citations {
c = strings.Trim(c, "[]")
a := strings.Split(c, ",")
for _, v := range a {
v = strings.TrimSpace(v)
i, _ := strconv.Atoi(v)
if i < 1 || i > len(sources) {
continue
}
lookup[i] = append(lookup[i], sources[i-1].source)
}
}
res := resp.Choices[0].Content
if len(lookup) > 0 {
res += "\n\nHere are the sources for the information provided:\n"
for i := 1; i <= len(sources); i++ {
if _, ok := lookup[i]; !ok {
continue
}
res += "[" + fmt.Sprint(i) + "] <" + lookup[i][0] + ">\n"
}
}
return res, nil
}