Add sites/recipe package with ExtractRecipe() that works on any recipe URL. Parses JSON-LD structured data (@type: Recipe) first, with DOM fallback. Handles @graph containers, arrays, HowToStep objects, ISO 8601 durations, and various author/yield/image formats. Closes #29 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
307 lines
8.4 KiB
Go
307 lines
8.4 KiB
Go
package recipe
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor"
|
|
"gitea.stevedudenhoeffer.com/steve/go-extractor/extractortest"
|
|
)
|
|
|
|
const sampleJSONLD = `{
|
|
"@type": "Recipe",
|
|
"name": "Chocolate Chip Cookies",
|
|
"description": "The best chocolate chip cookies ever.",
|
|
"author": {"@type": "Person", "name": "Jane Smith"},
|
|
"prepTime": "PT15M",
|
|
"cookTime": "PT10M",
|
|
"totalTime": "PT25M",
|
|
"recipeYield": "24 cookies",
|
|
"recipeIngredient": [
|
|
"2 cups flour",
|
|
"1 cup sugar",
|
|
"1 cup chocolate chips"
|
|
],
|
|
"recipeInstructions": [
|
|
{"@type": "HowToStep", "text": "Preheat oven to 350F."},
|
|
{"@type": "HowToStep", "text": "Mix dry ingredients."},
|
|
{"@type": "HowToStep", "text": "Bake for 10 minutes."}
|
|
],
|
|
"image": "https://example.com/cookies.jpg",
|
|
"nutrition": {"calories": "250 calories"},
|
|
"aggregateRating": {"ratingValue": "4.8"}
|
|
}`
|
|
|
|
const sampleGraphJSONLD = `{
|
|
"@graph": [
|
|
{"@type": "WebPage", "name": "Recipe Page"},
|
|
{
|
|
"@type": "Recipe",
|
|
"name": "Banana Bread",
|
|
"author": "Bob Baker",
|
|
"recipeIngredient": ["3 bananas", "2 cups flour"],
|
|
"recipeInstructions": "Mix and bake at 350F for 60 minutes."
|
|
}
|
|
]
|
|
}`
|
|
|
|
const sampleArrayJSONLD = `[
|
|
{"@type": "WebSite", "name": "Cooking Blog"},
|
|
{
|
|
"@type": "Recipe",
|
|
"name": "Pancakes",
|
|
"recipeYield": ["4 servings"],
|
|
"recipeIngredient": ["1 cup flour", "1 egg", "1 cup milk"],
|
|
"image": ["https://example.com/pancakes.jpg"]
|
|
}
|
|
]`
|
|
|
|
func makeRecipeDoc(jsonLD string) *extractortest.MockDocument {
|
|
return &extractortest.MockDocument{
|
|
URLValue: "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"script[type='application/ld+json']": {
|
|
&extractortest.MockNode{TextValue: jsonLD},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromJSONLD(t *testing.T) {
|
|
doc := makeRecipeDoc(sampleJSONLD)
|
|
|
|
r, err := extractRecipeFromJSONLD(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
|
}
|
|
|
|
if r.Name != "Chocolate Chip Cookies" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
|
|
}
|
|
if r.Author != "Jane Smith" {
|
|
t.Errorf("Author = %q, want %q", r.Author, "Jane Smith")
|
|
}
|
|
if r.PrepTime != "15 min" {
|
|
t.Errorf("PrepTime = %q, want %q", r.PrepTime, "15 min")
|
|
}
|
|
if r.CookTime != "10 min" {
|
|
t.Errorf("CookTime = %q, want %q", r.CookTime, "10 min")
|
|
}
|
|
if r.TotalTime != "25 min" {
|
|
t.Errorf("TotalTime = %q, want %q", r.TotalTime, "25 min")
|
|
}
|
|
if r.Yield != "24 cookies" {
|
|
t.Errorf("Yield = %q, want %q", r.Yield, "24 cookies")
|
|
}
|
|
if len(r.Ingredients) != 3 {
|
|
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
|
|
}
|
|
if r.Ingredients[0] != "2 cups flour" {
|
|
t.Errorf("Ingredients[0] = %q, want %q", r.Ingredients[0], "2 cups flour")
|
|
}
|
|
if len(r.Instructions) != 3 {
|
|
t.Fatalf("len(Instructions) = %d, want 3", len(r.Instructions))
|
|
}
|
|
if r.Instructions[0] != "Preheat oven to 350F." {
|
|
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Preheat oven to 350F.")
|
|
}
|
|
if r.ImageURL != "https://example.com/cookies.jpg" {
|
|
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/cookies.jpg")
|
|
}
|
|
if r.Calories != "250 calories" {
|
|
t.Errorf("Calories = %q, want %q", r.Calories, "250 calories")
|
|
}
|
|
if r.Rating != 4.8 {
|
|
t.Errorf("Rating = %v, want 4.8", r.Rating)
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromJSONLD_Graph(t *testing.T) {
|
|
doc := makeRecipeDoc(sampleGraphJSONLD)
|
|
|
|
r, err := extractRecipeFromJSONLD(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
|
}
|
|
|
|
if r.Name != "Banana Bread" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Banana Bread")
|
|
}
|
|
if r.Author != "Bob Baker" {
|
|
t.Errorf("Author = %q, want %q", r.Author, "Bob Baker")
|
|
}
|
|
if len(r.Ingredients) != 2 {
|
|
t.Fatalf("len(Ingredients) = %d, want 2", len(r.Ingredients))
|
|
}
|
|
if len(r.Instructions) != 1 {
|
|
t.Fatalf("len(Instructions) = %d, want 1", len(r.Instructions))
|
|
}
|
|
if r.Instructions[0] != "Mix and bake at 350F for 60 minutes." {
|
|
t.Errorf("Instructions[0] = %q, want %q", r.Instructions[0], "Mix and bake at 350F for 60 minutes.")
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromJSONLD_Array(t *testing.T) {
|
|
doc := makeRecipeDoc(sampleArrayJSONLD)
|
|
|
|
r, err := extractRecipeFromJSONLD(doc)
|
|
if err != nil {
|
|
t.Fatalf("extractRecipeFromJSONLD() error: %v", err)
|
|
}
|
|
|
|
if r.Name != "Pancakes" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Pancakes")
|
|
}
|
|
if r.Yield != "4 servings" {
|
|
t.Errorf("Yield = %q, want %q", r.Yield, "4 servings")
|
|
}
|
|
if r.ImageURL != "https://example.com/pancakes.jpg" {
|
|
t.Errorf("ImageURL = %q, want %q", r.ImageURL, "https://example.com/pancakes.jpg")
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromJSONLD_NoRecipe(t *testing.T) {
|
|
doc := makeRecipeDoc(`{"@type": "WebPage", "name": "Not a recipe"}`)
|
|
|
|
_, err := extractRecipeFromJSONLD(doc)
|
|
if err == nil {
|
|
t.Error("expected error for non-Recipe JSON-LD")
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromDOM(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"h1": {
|
|
&extractortest.MockNode{TextValue: "Grandma's Apple Pie"},
|
|
},
|
|
"div.recipe-summary p": {
|
|
&extractortest.MockNode{TextValue: "A classic apple pie recipe."},
|
|
},
|
|
"li.ingredient": {
|
|
&extractortest.MockNode{TextValue: "6 apples"},
|
|
&extractortest.MockNode{TextValue: "1 cup sugar"},
|
|
&extractortest.MockNode{TextValue: "2 pie crusts"},
|
|
},
|
|
"li.instruction": {
|
|
&extractortest.MockNode{TextValue: "Peel and slice apples."},
|
|
&extractortest.MockNode{TextValue: "Fill pie crust and bake."},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
r := extractRecipeFromDOM(doc)
|
|
|
|
if r.Name != "Grandma's Apple Pie" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Grandma's Apple Pie")
|
|
}
|
|
if r.Description != "A classic apple pie recipe." {
|
|
t.Errorf("Description = %q, want %q", r.Description, "A classic apple pie recipe.")
|
|
}
|
|
if len(r.Ingredients) != 3 {
|
|
t.Fatalf("len(Ingredients) = %d, want 3", len(r.Ingredients))
|
|
}
|
|
if len(r.Instructions) != 2 {
|
|
t.Fatalf("len(Instructions) = %d, want 2", len(r.Instructions))
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipe_MockBrowser(t *testing.T) {
|
|
doc := makeRecipeDoc(sampleJSONLD)
|
|
|
|
browser := &extractortest.MockBrowser{
|
|
Documents: map[string]*extractortest.MockDocument{
|
|
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/": doc,
|
|
},
|
|
}
|
|
|
|
r, err := DefaultConfig.ExtractRecipe(
|
|
context.Background(),
|
|
browser,
|
|
"https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/",
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("ExtractRecipe() error: %v", err)
|
|
}
|
|
|
|
if r.Name != "Chocolate Chip Cookies" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Chocolate Chip Cookies")
|
|
}
|
|
if r.SourceURL != "https://www.allrecipes.com/recipe/10813/best-chocolate-chip-cookies/" {
|
|
t.Errorf("SourceURL = %q, want recipe URL", r.SourceURL)
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipe_FallbackToDOM(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
URLValue: "https://example.com/recipe",
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{
|
|
"h1": {
|
|
&extractortest.MockNode{TextValue: "Simple Recipe"},
|
|
},
|
|
"li.ingredient": {
|
|
&extractortest.MockNode{TextValue: "1 cup flour"},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
browser := &extractortest.MockBrowser{
|
|
Documents: map[string]*extractortest.MockDocument{
|
|
"https://example.com/recipe": doc,
|
|
},
|
|
}
|
|
|
|
r, err := DefaultConfig.ExtractRecipe(context.Background(), browser, "https://example.com/recipe")
|
|
if err != nil {
|
|
t.Fatalf("ExtractRecipe() error: %v", err)
|
|
}
|
|
|
|
if r.Name != "Simple Recipe" {
|
|
t.Errorf("Name = %q, want %q", r.Name, "Simple Recipe")
|
|
}
|
|
if len(r.Ingredients) != 1 {
|
|
t.Fatalf("len(Ingredients) = %d, want 1", len(r.Ingredients))
|
|
}
|
|
}
|
|
|
|
func TestFormatDuration(t *testing.T) {
|
|
tests := []struct {
|
|
input string
|
|
want string
|
|
}{
|
|
{"PT15M", "15 min"},
|
|
{"PT1H30M", "1 hr 30 min"},
|
|
{"PT10M", "10 min"},
|
|
{"PT2H", "2 hr"},
|
|
{"PT45S", "45 sec"},
|
|
{"PT1H15M30S", "1 hr 15 min 30 sec"},
|
|
{"", ""},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
got := formatDuration(tt.input)
|
|
if got != tt.want {
|
|
t.Errorf("formatDuration(%q) = %q, want %q", tt.input, got, tt.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestExtractRecipeFromDOM_Empty(t *testing.T) {
|
|
doc := &extractortest.MockDocument{
|
|
MockNode: extractortest.MockNode{
|
|
Children: map[string]extractor.Nodes{},
|
|
},
|
|
}
|
|
|
|
r := extractRecipeFromDOM(doc)
|
|
if r.Name != "" {
|
|
t.Errorf("expected empty name, got %q", r.Name)
|
|
}
|
|
}
|