Files
go-extractor/sites/archive/cmd/archive/main.go
Steve Dudenhoeffer b4e462a6b4
All checks were successful
CI / vet (pull_request) Successful in 1m6s
CI / build (pull_request) Successful in 1m7s
CI / test (pull_request) Successful in 1m8s
fix: prevent panic on short article content in archive cmd
Add length check before slicing article.Content[:32], matching the
safe truncation pattern already used in cmd/browser/main.go.

Closes #9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:14:32 +00:00

139 lines
2.7 KiB
Go

package main
import (
"context"
"fmt"
"os"
"time"
"github.com/urfave/cli/v3"
"gitea.stevedudenhoeffer.com/steve/go-extractor"
"gitea.stevedudenhoeffer.com/steve/go-extractor/cmd/browser/pkg/browser"
"gitea.stevedudenhoeffer.com/steve/go-extractor/sites/archive"
)
type ArchiveFlags []cli.Flag
var Flags = ArchiveFlags{
&cli.StringFlag{
Name: "endpoint",
Usage: "Archive endpoint to use",
DefaultText: "https://archive.ph",
},
&cli.StringFlag{
Name: "timeout",
Usage: "Timeout for requests",
DefaultText: "10s",
},
}
func (f ArchiveFlags) ToConfig(_ context.Context, cmd *cli.Command) (archive.Config, error) {
c := archive.DefaultConfig
if e := cmd.String("endpoint"); e != "" {
c.Endpoint = e
}
if t := cmd.String("timeout"); t != "" {
d, err := time.ParseDuration(t)
if err != nil {
return c, fmt.Errorf("invalid timeout duration: %w", err)
}
c.Timeout = &d
}
return c, nil
}
func main() {
var flags []cli.Flag
flags = append(flags, browser.Flags...)
flags = append(flags, Flags...)
cli := &cli.Command{
Name: "archive",
Usage: "Archive a website",
Flags: Flags,
Action: func(ctx context.Context, cli *cli.Command) error {
target := cli.Args().First()
if target == "" {
return fmt.Errorf("usage: archive <url>")
}
b, err := browser.FromCommand(ctx, cli)
if err != nil {
return err
}
doc, err := archive.IsArchived(ctx, b, target)
if err != nil {
return err
}
if doc == nil {
fmt.Println("Not archived")
doc, err = archive.Archive(ctx, b, target)
if err != nil {
return err
}
if doc == nil {
return fmt.Errorf("failed to archive")
}
}
defer func(doc extractor.Document) {
fmt.Println("Closing document", doc.URL())
err := doc.Close()
if err != nil {
fmt.Println("failed to close document", err)
}
}(doc)
fmt.Println("Archived at ", doc.URL())
article, err := extractor.Readability(ctx, doc)
if err != nil {
return err
}
content := ""
if article.Content != "" {
if len(article.Content) > 32 {
content = article.Content[:32] + "..."
} else {
content = article.Content
}
}
fmt.Println("Title:", article.Title)
fmt.Println("Byline:", article.Byline)
fmt.Println("Site:", article.SiteName)
fmt.Println("Published:", article.PublishedTime)
fmt.Println("Excerpt:", article.Excerpt)
fmt.Println("Length:", article.Length)
fmt.Println("Lang:", article.Lang)
fmt.Println("Content:", content)
fmt.Println("TextContent:", article.TextContent)
return nil
},
}
err := cli.Run(context.Background(), os.Args)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
}