Merge pull request 'feature: add recipe extractor with JSON-LD and DOM parsing' (#48) from feature/allrecipes-extractor into main
This commit was merged in pull request #48.
This commit is contained in:
374
sites/recipe/recipe.go
Normal file
374
sites/recipe/recipe.go
Normal file
@@ -0,0 +1,374 @@
|
||||
package recipe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
)
|
||||
|
||||
// Recipe holds structured recipe data extracted from a web page.
|
||||
type Recipe struct {
|
||||
Name string
|
||||
Description string
|
||||
Author string
|
||||
PrepTime string
|
||||
CookTime string
|
||||
TotalTime string
|
||||
Yield string // servings
|
||||
Ingredients []string
|
||||
Instructions []string
|
||||
ImageURL string
|
||||
Rating float64
|
||||
Calories string
|
||||
SourceURL string
|
||||
}
|
||||
|
||||
// Config holds configuration for the recipe extractor.
|
||||
type Config struct{}
|
||||
|
||||
// DefaultConfig is the default recipe configuration.
|
||||
var DefaultConfig = Config{}
|
||||
|
||||
func (c Config) validate() Config {
|
||||
return c
|
||||
}
|
||||
|
||||
// ExtractRecipe extracts structured recipe data from any URL.
|
||||
// Uses JSON-LD structured data when available, falls back to DOM parsing.
|
||||
func (c Config) ExtractRecipe(ctx context.Context, b extractor.Browser, url string) (*Recipe, error) {
|
||||
c = c.validate()
|
||||
|
||||
slog.Info("fetching recipe", "url", url)
|
||||
doc, err := b.Open(ctx, url, extractor.OpenPageOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open recipe page: %w", err)
|
||||
}
|
||||
defer extractor.DeferClose(doc)
|
||||
|
||||
timeout := 10 * time.Second
|
||||
if err := doc.WaitForNetworkIdle(&timeout); err != nil {
|
||||
slog.Warn("WaitForNetworkIdle failed", "err", err)
|
||||
}
|
||||
|
||||
r, err := extractRecipeFromJSONLD(doc)
|
||||
if err == nil && r.Name != "" {
|
||||
r.SourceURL = url
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Fall back to DOM parsing
|
||||
r = extractRecipeFromDOM(doc)
|
||||
r.SourceURL = url
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// ExtractRecipe is a convenience function using DefaultConfig.
|
||||
func ExtractRecipe(ctx context.Context, b extractor.Browser, url string) (*Recipe, error) {
|
||||
return DefaultConfig.ExtractRecipe(ctx, b, url)
|
||||
}
|
||||
|
||||
// jsonLDGraph represents a JSON-LD @graph container.
|
||||
type jsonLDGraph struct {
|
||||
Graph []json.RawMessage `json:"@graph"`
|
||||
}
|
||||
|
||||
// jsonLDRecipe represents the JSON-LD Recipe schema fields we extract.
|
||||
type jsonLDRecipe struct {
|
||||
Type interface{} `json:"@type"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Author interface{} `json:"author"`
|
||||
PrepTime string `json:"prepTime"`
|
||||
CookTime string `json:"cookTime"`
|
||||
TotalTime string `json:"totalTime"`
|
||||
Yield interface{} `json:"recipeYield"`
|
||||
Ingredients []string `json:"recipeIngredient"`
|
||||
Instructions interface{} `json:"recipeInstructions"`
|
||||
Image interface{} `json:"image"`
|
||||
Nutrition *struct {
|
||||
Calories string `json:"calories"`
|
||||
} `json:"nutrition"`
|
||||
Rating *struct {
|
||||
RatingValue interface{} `json:"ratingValue"`
|
||||
} `json:"aggregateRating"`
|
||||
}
|
||||
|
||||
func extractRecipeFromJSONLD(doc extractor.Node) (*Recipe, error) {
|
||||
var recipe Recipe
|
||||
|
||||
scripts := doc.Select("script[type='application/ld+json']")
|
||||
if len(scripts) == 0 {
|
||||
return nil, fmt.Errorf("no JSON-LD scripts found")
|
||||
}
|
||||
|
||||
for _, script := range scripts {
|
||||
txt, err := script.Text()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
txt = strings.TrimSpace(txt)
|
||||
|
||||
r, err := parseJSONLDRecipe(txt)
|
||||
if err == nil && r.Name != "" {
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
|
||||
return &recipe, fmt.Errorf("no Recipe JSON-LD found")
|
||||
}
|
||||
|
||||
func parseJSONLDRecipe(raw string) (*Recipe, error) {
|
||||
// Try direct Recipe object
|
||||
var jr jsonLDRecipe
|
||||
if err := json.Unmarshal([]byte(raw), &jr); err == nil {
|
||||
if isRecipeType(jr.Type) {
|
||||
return convertJSONLDRecipe(&jr), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Try @graph array
|
||||
var graph jsonLDGraph
|
||||
if err := json.Unmarshal([]byte(raw), &graph); err == nil && len(graph.Graph) > 0 {
|
||||
for _, item := range graph.Graph {
|
||||
var jr jsonLDRecipe
|
||||
if err := json.Unmarshal(item, &jr); err == nil && isRecipeType(jr.Type) {
|
||||
return convertJSONLDRecipe(&jr), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try array of objects
|
||||
var arr []json.RawMessage
|
||||
if err := json.Unmarshal([]byte(raw), &arr); err == nil {
|
||||
for _, item := range arr {
|
||||
var jr jsonLDRecipe
|
||||
if err := json.Unmarshal(item, &jr); err == nil && isRecipeType(jr.Type) {
|
||||
return convertJSONLDRecipe(&jr), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no Recipe type in JSON-LD")
|
||||
}
|
||||
|
||||
func isRecipeType(t interface{}) bool {
|
||||
switch v := t.(type) {
|
||||
case string:
|
||||
return v == "Recipe"
|
||||
case []interface{}:
|
||||
for _, item := range v {
|
||||
if s, ok := item.(string); ok && s == "Recipe" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func convertJSONLDRecipe(jr *jsonLDRecipe) *Recipe {
|
||||
r := &Recipe{
|
||||
Name: jr.Name,
|
||||
Description: jr.Description,
|
||||
PrepTime: formatDuration(jr.PrepTime),
|
||||
CookTime: formatDuration(jr.CookTime),
|
||||
TotalTime: formatDuration(jr.TotalTime),
|
||||
Ingredients: jr.Ingredients,
|
||||
}
|
||||
|
||||
// Author can be string or object
|
||||
r.Author = extractAuthor(jr.Author)
|
||||
|
||||
// Yield can be string or array
|
||||
r.Yield = extractYield(jr.Yield)
|
||||
|
||||
// Instructions can be string, array of strings, or array of HowToStep objects
|
||||
r.Instructions = extractInstructions(jr.Instructions)
|
||||
|
||||
// Image can be string or object or array
|
||||
r.ImageURL = extractImage(jr.Image)
|
||||
|
||||
// Nutrition
|
||||
if jr.Nutrition != nil {
|
||||
r.Calories = jr.Nutrition.Calories
|
||||
}
|
||||
|
||||
// Rating
|
||||
if jr.Rating != nil {
|
||||
r.Rating = extractFloat(jr.Rating.RatingValue)
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func extractAuthor(v interface{}) string {
|
||||
switch a := v.(type) {
|
||||
case string:
|
||||
return a
|
||||
case map[string]interface{}:
|
||||
if name, ok := a["name"].(string); ok {
|
||||
return name
|
||||
}
|
||||
case []interface{}:
|
||||
if len(a) > 0 {
|
||||
return extractAuthor(a[0])
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractYield(v interface{}) string {
|
||||
switch y := v.(type) {
|
||||
case string:
|
||||
return y
|
||||
case []interface{}:
|
||||
if len(y) > 0 {
|
||||
if s, ok := y[0].(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
case float64:
|
||||
return fmt.Sprintf("%.0f", y)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractInstructions(v interface{}) []string {
|
||||
switch inst := v.(type) {
|
||||
case string:
|
||||
return []string{inst}
|
||||
case []interface{}:
|
||||
var steps []string
|
||||
for _, item := range inst {
|
||||
switch step := item.(type) {
|
||||
case string:
|
||||
steps = append(steps, step)
|
||||
case map[string]interface{}:
|
||||
if text, ok := step["text"].(string); ok {
|
||||
steps = append(steps, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
return steps
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractImage(v interface{}) string {
|
||||
switch img := v.(type) {
|
||||
case string:
|
||||
return img
|
||||
case map[string]interface{}:
|
||||
if url, ok := img["url"].(string); ok {
|
||||
return url
|
||||
}
|
||||
case []interface{}:
|
||||
if len(img) > 0 {
|
||||
return extractImage(img[0])
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractFloat(v interface{}) float64 {
|
||||
switch f := v.(type) {
|
||||
case float64:
|
||||
return f
|
||||
case string:
|
||||
var val float64
|
||||
fmt.Sscanf(f, "%f", &val)
|
||||
return val
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// formatDuration converts ISO 8601 duration (PT1H30M) to human-readable form.
|
||||
func formatDuration(iso string) string {
|
||||
if iso == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
iso = strings.TrimPrefix(iso, "PT")
|
||||
iso = strings.TrimPrefix(iso, "pt")
|
||||
|
||||
if iso == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
var parts []string
|
||||
var num string
|
||||
|
||||
for _, c := range iso {
|
||||
switch {
|
||||
case c >= '0' && c <= '9':
|
||||
num += string(c)
|
||||
case c == 'H' || c == 'h':
|
||||
if num != "" {
|
||||
parts = append(parts, num+" hr")
|
||||
num = ""
|
||||
}
|
||||
case c == 'M' || c == 'm':
|
||||
if num != "" {
|
||||
parts = append(parts, num+" min")
|
||||
num = ""
|
||||
}
|
||||
case c == 'S' || c == 's':
|
||||
if num != "" {
|
||||
parts = append(parts, num+" sec")
|
||||
num = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(parts) == 0 {
|
||||
return iso
|
||||
}
|
||||
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
func extractRecipeFromDOM(doc extractor.Node) *Recipe {
|
||||
var r Recipe
|
||||
|
||||
// Name — typically in h1 or h2
|
||||
names := doc.Select("h1.recipe-title")
|
||||
if len(names) == 0 {
|
||||
names = doc.Select("h1")
|
||||
}
|
||||
if len(names) > 0 {
|
||||
r.Name, _ = names[0].Text()
|
||||
r.Name = strings.TrimSpace(r.Name)
|
||||
}
|
||||
|
||||
// Description
|
||||
descs := doc.Select("div.recipe-summary p")
|
||||
if len(descs) > 0 {
|
||||
r.Description, _ = descs[0].Text()
|
||||
}
|
||||
|
||||
// Ingredients
|
||||
_ = doc.ForEach("li.ingredient", func(n extractor.Node) error {
|
||||
txt, _ := n.Text()
|
||||
txt = strings.TrimSpace(txt)
|
||||
if txt != "" {
|
||||
r.Ingredients = append(r.Ingredients, txt)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// Instructions
|
||||
_ = doc.ForEach("li.instruction", func(n extractor.Node) error {
|
||||
txt, _ := n.Text()
|
||||
txt = strings.TrimSpace(txt)
|
||||
if txt != "" {
|
||||
r.Instructions = append(r.Instructions, txt)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return &r
|
||||
}
|
||||
306
sites/recipe/recipe_test.go
Normal file
306
sites/recipe/recipe_test.go
Normal file
@@ -0,0 +1,306 @@
|
||||
package recipe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
||||
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
|
||||
)
|
||||
|
||||
const sampleJSONLD = `{
|
||||
"@type": "Recipe",
|
||||
"name": "Chocolate Chip Cookies",
|
||||
"description": "The best chocolate chip cookies ever.",
|
||||
"author": {"@type": "Person", "name": "Jane Smith"},
|
||||
"prepTime": "PT15M",
|
||||
"cookTime": "PT10M",
|
||||
"totalTime": "PT25M",
|
||||
"recipeYield": "24 cookies",
|
||||
"recipeIngredient": [
|
||||
"2 cups flour",
|
||||
"1 cup sugar",
|
||||
"1 cup chocolate chips"
|
||||
],
|
||||
"recipeInstructions": [
|
||||
{"@type": "HowToStep", "text": "Preheat oven to 350F."},
|
||||
{"@type": "HowToStep", "text": "Mix dry ingredients."},
|
||||
{"@type": "HowToStep", "text": "Bake for 10 minutes."}
|
||||
],
|
||||
"image": "https://example.com/cookies.jpg",
|
||||
"nutrition": {"calories": "250 calories"},
|
||||
"aggregateRating": {"ratingValue": "4.8"}
|
||||
}`
|
||||
|
||||
const sampleGraphJSONLD = `{
|
||||
"@graph": [
|
||||
{"@type": "WebPage", "name": "Recipe Page"},
|
||||
{
|
||||
"@type": "Recipe",
|
||||
"name": "Banana Bread",
|
||||
"author": "Bob Baker",
|
||||
"recipeIngredient": ["3 bananas", "2 cups flour"],
|
||||
"recipeInstructions": "Mix and bake at 350F for 60 minutes."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
const sampleArrayJSONLD = `[
|
||||
{"@type": "WebSite", "name": "Cooking Blog"},
|
||||
{
|
||||
"@type": "Recipe",
|
||||
"name": "Pancakes",
|
||||
"recipeYield": ["4 servings"],
|
||||
"recipeIngredient": ["1 cup flour", "1 egg", "1 cup milk"],
|
||||
"image": ["https://example.com/pancakes.jpg"]
|
||||
}
|
||||
]`
|
||||
|
||||
func makeRecipeDoc(jsonLD string) *extractortest.MockDocument {
|
||||
return &extractortest.MockDocument{
|
||||
URLValue: "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
|
||||
MockNode: extractortest.MockNode{
|
||||
Children: map[string]extractor.Nodes{
|
||||
"script[type='application/ld+json']": {
|
||||
&extractortest.MockNode{TextValue: jsonLD},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromJSONLD(t *testing.T) {
|
||||
doc := makeRecipeDoc(sampleJSONLD)
|
||||
|
||||
r, err := extractRecipeFromJSONLD(doc)
|
||||
if err != nil {
|
||||
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
||||
}
|
||||
|
||||
if r.Name != "Chocolate Chip Cookies" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
|
||||
}
|
||||
if r.Author != "Jane Smith" {
|
||||
t.Errorf("Author = %q, want %q", r.Author, "Jane Smith")
|
||||
}
|
||||
if r.PrepTime != "15 min" {
|
||||
t.Errorf("PrepTime = %q, want %q", r.PrepTime, "15 min")
|
||||
}
|
||||
if r.CookTime != "10 min" {
|
||||
t.Errorf("CookTime = %q, want %q", r.CookTime, "10 min")
|
||||
}
|
||||
if r.TotalTime != "25 min" {
|
||||
t.Errorf("TotalTime = %q, want %q", r.TotalTime, "25 min")
|
||||
}
|
||||
if r.Yield != "24 cookies" {
|
||||
t.Errorf("Yield = %q, want %q", r.Yield, "24 cookies")
|
||||
}
|
||||
if len(r.Ingredients) != 3 {
|
||||
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
|
||||
}
|
||||
if r.Ingredients[0] != "2 cups flour" {
|
||||
t.Errorf("Ingredients[0] = %q, want %q", r.Ingredients[0], "2 cups flour")
|
||||
}
|
||||
if len(r.Instructions) != 3 {
|
||||
t.Fatalf("len(Instructions) = %d, want 3", len(r.Instructions))
|
||||
}
|
||||
if r.Instructions[0] != "Preheat oven to 350F." {
|
||||
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Preheat oven to 350F.")
|
||||
}
|
||||
if r.ImageURL != "https://example.com/cookies.jpg" {
|
||||
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/cookies.jpg")
|
||||
}
|
||||
if r.Calories != "250 calories" {
|
||||
t.Errorf("Calories = %q, want %q", r.Calories, "250 calories")
|
||||
}
|
||||
if r.Rating != 4.8 {
|
||||
t.Errorf("Rating = %v, want 4.8", r.Rating)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromJSONLD_Graph(t *testing.T) {
|
||||
doc := makeRecipeDoc(sampleGraphJSONLD)
|
||||
|
||||
r, err := extractRecipeFromJSONLD(doc)
|
||||
if err != nil {
|
||||
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
||||
}
|
||||
|
||||
if r.Name != "Banana Bread" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Banana Bread")
|
||||
}
|
||||
if r.Author != "Bob Baker" {
|
||||
t.Errorf("Author = %q, want %q", r.Author, "Bob Baker")
|
||||
}
|
||||
if len(r.Ingredients) != 2 {
|
||||
t.Fatalf("len(Ingredients) = %d, want 2", len(r.Ingredients))
|
||||
}
|
||||
if len(r.Instructions) != 1 {
|
||||
t.Fatalf("len(Instructions) = %d, want 1", len(r.Instructions))
|
||||
}
|
||||
if r.Instructions[0] != "Mix and bake at 350F for 60 minutes." {
|
||||
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Mix and bake at 350F for 60 minutes.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromJSONLD_Array(t *testing.T) {
|
||||
doc := makeRecipeDoc(sampleArrayJSONLD)
|
||||
|
||||
r, err := extractRecipeFromJSONLD(doc)
|
||||
if err != nil {
|
||||
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
||||
}
|
||||
|
||||
if r.Name != "Pancakes" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Pancakes")
|
||||
}
|
||||
if r.Yield != "4 servings" {
|
||||
t.Errorf("Yield = %q, want %q", r.Yield, "4 servings")
|
||||
}
|
||||
if r.ImageURL != "https://example.com/pancakes.jpg" {
|
||||
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/pancakes.jpg")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromJSONLD_NoRecipe(t *testing.T) {
|
||||
doc := makeRecipeDoc(`{"@type": "WebPage", "name": "Not a recipe"}`)
|
||||
|
||||
_, err := extractRecipeFromJSONLD(doc)
|
||||
if err == nil {
|
||||
t.Error("expected error for non-Recipe JSON-LD")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromDOM(t *testing.T) {
|
||||
doc := &extractortest.MockDocument{
|
||||
MockNode: extractortest.MockNode{
|
||||
Children: map[string]extractor.Nodes{
|
||||
"h1": {
|
||||
&extractortest.MockNode{TextValue: "Grandma's Apple Pie"},
|
||||
},
|
||||
"div.recipe-summary p": {
|
||||
&extractortest.MockNode{TextValue: "A classic apple pie recipe."},
|
||||
},
|
||||
"li.ingredient": {
|
||||
&extractortest.MockNode{TextValue: "6 apples"},
|
||||
&extractortest.MockNode{TextValue: "1 cup sugar"},
|
||||
&extractortest.MockNode{TextValue: "2 pie crusts"},
|
||||
},
|
||||
"li.instruction": {
|
||||
&extractortest.MockNode{TextValue: "Peel and slice apples."},
|
||||
&extractortest.MockNode{TextValue: "Fill pie crust and bake."},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
r := extractRecipeFromDOM(doc)
|
||||
|
||||
if r.Name != "Grandma's Apple Pie" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Grandma's Apple Pie")
|
||||
}
|
||||
if r.Description != "A classic apple pie recipe." {
|
||||
t.Errorf("Description = %q, want %q", r.Description, "A classic apple pie recipe.")
|
||||
}
|
||||
if len(r.Ingredients) != 3 {
|
||||
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
|
||||
}
|
||||
if len(r.Instructions) != 2 {
|
||||
t.Fatalf("len(Instructions) = %d, want 2", len(r.Instructions))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipe_MockBrowser(t *testing.T) {
|
||||
doc := makeRecipeDoc(sampleJSONLD)
|
||||
|
||||
browser := &extractortest.MockBrowser{
|
||||
Documents: map[string]*extractortest.MockDocument{
|
||||
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/": doc,
|
||||
},
|
||||
}
|
||||
|
||||
r, err := DefaultConfig.ExtractRecipe(
|
||||
context.Background(),
|
||||
browser,
|
||||
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractRecipe() error: %v", err)
|
||||
}
|
||||
|
||||
if r.Name != "Chocolate Chip Cookies" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
|
||||
}
|
||||
if r.SourceURL != "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/" {
|
||||
t.Errorf("SourceURL = %q, want recipe URL", r.SourceURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipe_FallbackToDOM(t *testing.T) {
|
||||
doc := &extractortest.MockDocument{
|
||||
URLValue: "https://example.com/recipe",
|
||||
MockNode: extractortest.MockNode{
|
||||
Children: map[string]extractor.Nodes{
|
||||
"h1": {
|
||||
&extractortest.MockNode{TextValue: "Simple Recipe"},
|
||||
},
|
||||
"li.ingredient": {
|
||||
&extractortest.MockNode{TextValue: "1 cup flour"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
browser := &extractortest.MockBrowser{
|
||||
Documents: map[string]*extractortest.MockDocument{
|
||||
"https://example.com/recipe": doc,
|
||||
},
|
||||
}
|
||||
|
||||
r, err := DefaultConfig.ExtractRecipe(context.Background(), browser, "https://example.com/recipe")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractRecipe() error: %v", err)
|
||||
}
|
||||
|
||||
if r.Name != "Simple Recipe" {
|
||||
t.Errorf("Name = %q, want %q", r.Name, "Simple Recipe")
|
||||
}
|
||||
if len(r.Ingredients) != 1 {
|
||||
t.Fatalf("len(Ingredients) = %d, want 1", len(r.Ingredients))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatDuration(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{"PT15M", "15 min"},
|
||||
{"PT1H30M", "1 hr 30 min"},
|
||||
{"PT10M", "10 min"},
|
||||
{"PT2H", "2 hr"},
|
||||
{"PT45S", "45 sec"},
|
||||
{"PT1H15M30S", "1 hr 15 min 30 sec"},
|
||||
{"", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := formatDuration(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("formatDuration(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractRecipeFromDOM_Empty(t *testing.T) {
|
||||
doc := &extractortest.MockDocument{
|
||||
MockNode: extractortest.MockNode{
|
||||
Children: map[string]extractor.Nodes{},
|
||||
},
|
||||
}
|
||||
|
||||
r := extractRecipeFromDOM(doc)
|
||||
if r.Name != "" {
|
||||
t.Errorf("expected empty name, got %q", r.Name)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user