Files
go-extractor/sites/recipe/recipe_test.go
Steve Dudenhoeffer de0a065923
All checks were successful
CI / build (pull_request) Successful in 57s
CI / vet (pull_request) Successful in 1m2s
CI / test (pull_request) Successful in 1m5s
feature: add recipe extractor with JSON-LD and DOM parsing
Add sites/recipe package with ExtractRecipe() that works on any recipe
URL. Parses JSON-LD structured data (@type: Recipe) first, with DOM
fallback. Handles @graph containers, arrays, HowToStep objects, ISO
8601 durations, and various author/yield/image formats.

Closes #29

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:52:28 +00:00

307 lines
8.4 KiB
Go

package recipe
import (
"context"
"testing"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
)
const sampleJSONLD = `{
"@type": "Recipe",
"name": "Chocolate Chip Cookies",
"description": "The best chocolate chip cookies ever.",
"author": {"@type": "Person", "name": "Jane Smith"},
"prepTime": "PT15M",
"cookTime": "PT10M",
"totalTime": "PT25M",
"recipeYield": "24 cookies",
"recipeIngredient": [
"2 cups flour",
"1 cup sugar",
"1 cup chocolate chips"
],
"recipeInstructions": [
{"@type": "HowToStep", "text": "Preheat oven to 350F."},
{"@type": "HowToStep", "text": "Mix dry ingredients."},
{"@type": "HowToStep", "text": "Bake for 10 minutes."}
],
"image": "https://example.com/cookies.jpg",
"nutrition": {"calories": "250 calories"},
"aggregateRating": {"ratingValue": "4.8"}
}`
const sampleGraphJSONLD = `{
"@graph": [
{"@type": "WebPage", "name": "Recipe Page"},
{
"@type": "Recipe",
"name": "Banana Bread",
"author": "Bob Baker",
"recipeIngredient": ["3 bananas", "2 cups flour"],
"recipeInstructions": "Mix and bake at 350F for 60 minutes."
}
]
}`
const sampleArrayJSONLD = `[
{"@type": "WebSite", "name": "Cooking Blog"},
{
"@type": "Recipe",
"name": "Pancakes",
"recipeYield": ["4 servings"],
"recipeIngredient": ["1 cup flour", "1 egg", "1 cup milk"],
"image": ["https://example.com/pancakes.jpg"]
}
]`
func makeRecipeDoc(jsonLD string) *extractortest.MockDocument {
return &extractortest.MockDocument{
URLValue: "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"script[type='application/ld+json']": {
&extractortest.MockNode{TextValue: jsonLD},
},
},
},
}
}
func TestExtractRecipeFromJSONLD(t *testing.T) {
doc := makeRecipeDoc(sampleJSONLD)
r, err := extractRecipeFromJSONLD(doc)
if err != nil {
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
}
if r.Name != "Chocolate Chip Cookies" {
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
}
if r.Author != "Jane Smith" {
t.Errorf("Author = %q, want %q", r.Author, "Jane Smith")
}
if r.PrepTime != "15 min" {
t.Errorf("PrepTime = %q, want %q", r.PrepTime, "15 min")
}
if r.CookTime != "10 min" {
t.Errorf("CookTime = %q, want %q", r.CookTime, "10 min")
}
if r.TotalTime != "25 min" {
t.Errorf("TotalTime = %q, want %q", r.TotalTime, "25 min")
}
if r.Yield != "24 cookies" {
t.Errorf("Yield = %q, want %q", r.Yield, "24 cookies")
}
if len(r.Ingredients) != 3 {
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
}
if r.Ingredients[0] != "2 cups flour" {
t.Errorf("Ingredients[0] = %q, want %q", r.Ingredients[0], "2 cups flour")
}
if len(r.Instructions) != 3 {
t.Fatalf("len(Instructions) = %d, want 3", len(r.Instructions))
}
if r.Instructions[0] != "Preheat oven to 350F." {
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Preheat oven to 350F.")
}
if r.ImageURL != "https://example.com/cookies.jpg" {
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/cookies.jpg")
}
if r.Calories != "250 calories" {
t.Errorf("Calories = %q, want %q", r.Calories, "250 calories")
}
if r.Rating != 4.8 {
t.Errorf("Rating = %v, want 4.8", r.Rating)
}
}
func TestExtractRecipeFromJSONLD_Graph(t *testing.T) {
doc := makeRecipeDoc(sampleGraphJSONLD)
r, err := extractRecipeFromJSONLD(doc)
if err != nil {
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
}
if r.Name != "Banana Bread" {
t.Errorf("Name = %q, want %q", r.Name, "Banana Bread")
}
if r.Author != "Bob Baker" {
t.Errorf("Author = %q, want %q", r.Author, "Bob Baker")
}
if len(r.Ingredients) != 2 {
t.Fatalf("len(Ingredients) = %d, want 2", len(r.Ingredients))
}
if len(r.Instructions) != 1 {
t.Fatalf("len(Instructions) = %d, want 1", len(r.Instructions))
}
if r.Instructions[0] != "Mix and bake at 350F for 60 minutes." {
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Mix and bake at 350F for 60 minutes.")
}
}
func TestExtractRecipeFromJSONLD_Array(t *testing.T) {
doc := makeRecipeDoc(sampleArrayJSONLD)
r, err := extractRecipeFromJSONLD(doc)
if err != nil {
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
}
if r.Name != "Pancakes" {
t.Errorf("Name = %q, want %q", r.Name, "Pancakes")
}
if r.Yield != "4 servings" {
t.Errorf("Yield = %q, want %q", r.Yield, "4 servings")
}
if r.ImageURL != "https://example.com/pancakes.jpg" {
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/pancakes.jpg")
}
}
func TestExtractRecipeFromJSONLD_NoRecipe(t *testing.T) {
doc := makeRecipeDoc(`{"@type": "WebPage", "name": "Not a recipe"}`)
_, err := extractRecipeFromJSONLD(doc)
if err == nil {
t.Error("expected error for non-Recipe JSON-LD")
}
}
func TestExtractRecipeFromDOM(t *testing.T) {
doc := &extractortest.MockDocument{
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"h1": {
&extractortest.MockNode{TextValue: "Grandma's Apple Pie"},
},
"div.recipe-summary p": {
&extractortest.MockNode{TextValue: "A classic apple pie recipe."},
},
"li.ingredient": {
&extractortest.MockNode{TextValue: "6 apples"},
&extractortest.MockNode{TextValue: "1 cup sugar"},
&extractortest.MockNode{TextValue: "2 pie crusts"},
},
"li.instruction": {
&extractortest.MockNode{TextValue: "Peel and slice apples."},
&extractortest.MockNode{TextValue: "Fill pie crust and bake."},
},
},
},
}
r := extractRecipeFromDOM(doc)
if r.Name != "Grandma's Apple Pie" {
t.Errorf("Name = %q, want %q", r.Name, "Grandma's Apple Pie")
}
if r.Description != "A classic apple pie recipe." {
t.Errorf("Description = %q, want %q", r.Description, "A classic apple pie recipe.")
}
if len(r.Ingredients) != 3 {
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
}
if len(r.Instructions) != 2 {
t.Fatalf("len(Instructions) = %d, want 2", len(r.Instructions))
}
}
func TestExtractRecipe_MockBrowser(t *testing.T) {
doc := makeRecipeDoc(sampleJSONLD)
browser := &extractortest.MockBrowser{
Documents: map[string]*extractortest.MockDocument{
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/": doc,
},
}
r, err := DefaultConfig.ExtractRecipe(
context.Background(),
browser,
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
)
if err != nil {
t.Fatalf("ExtractRecipe() error: %v", err)
}
if r.Name != "Chocolate Chip Cookies" {
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
}
if r.SourceURL != "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/" {
t.Errorf("SourceURL = %q, want recipe URL", r.SourceURL)
}
}
func TestExtractRecipe_FallbackToDOM(t *testing.T) {
doc := &extractortest.MockDocument{
URLValue: "https://example.com/recipe",
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{
"h1": {
&extractortest.MockNode{TextValue: "Simple Recipe"},
},
"li.ingredient": {
&extractortest.MockNode{TextValue: "1 cup flour"},
},
},
},
}
browser := &extractortest.MockBrowser{
Documents: map[string]*extractortest.MockDocument{
"https://example.com/recipe": doc,
},
}
r, err := DefaultConfig.ExtractRecipe(context.Background(), browser, "https://example.com/recipe")
if err != nil {
t.Fatalf("ExtractRecipe() error: %v", err)
}
if r.Name != "Simple Recipe" {
t.Errorf("Name = %q, want %q", r.Name, "Simple Recipe")
}
if len(r.Ingredients) != 1 {
t.Fatalf("len(Ingredients) = %d, want 1", len(r.Ingredients))
}
}
func TestFormatDuration(t *testing.T) {
tests := []struct {
input string
want string
}{
{"PT15M", "15 min"},
{"PT1H30M", "1 hr 30 min"},
{"PT10M", "10 min"},
{"PT2H", "2 hr"},
{"PT45S", "45 sec"},
{"PT1H15M30S", "1 hr 15 min 30 sec"},
{"", ""},
}
for _, tt := range tests {
got := formatDuration(tt.input)
if got != tt.want {
t.Errorf("formatDuration(%q) = %q, want %q", tt.input, got, tt.want)
}
}
}
func TestExtractRecipeFromDOM_Empty(t *testing.T) {
doc := &extractortest.MockDocument{
MockNode: extractortest.MockNode{
Children: map[string]extractor.Nodes{},
},
}
r := extractRecipeFromDOM(doc)
if r.Name != "" {
t.Errorf("expected empty name, got %q", r.Name)
}
}