mirror of
https://github.com/by-jp/www.byjp.me.git
synced 2025-08-09 01:35:56 +01:00
469 lines
11 KiB
Go
469 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
_ "embed"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"regexp"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/by-jp/www.byjp.me/tools/shared"
|
|
"github.com/joho/godotenv"
|
|
"gopkg.in/yaml.v2"
|
|
)
|
|
|
|
//go:embed query.gql
|
|
var gql string
|
|
|
|
var ignoreLabels = []string{
|
|
"opinion-agree",
|
|
"opinion-disagree",
|
|
"interesting",
|
|
}
|
|
|
|
func main() {
|
|
if err := godotenv.Load(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
apiKey, ok := os.LookupEnv("OMNIVORE_API_KEY")
|
|
if !ok || len(apiKey) == 0 {
|
|
fmt.Fprint(os.Stderr, "OMNIVORE_API_KEY is not set")
|
|
os.Exit(1)
|
|
}
|
|
|
|
rootDir := "./"
|
|
if len(os.Args) > 1 {
|
|
rootDir = os.Args[1]
|
|
}
|
|
|
|
outputDir := path.Join(rootDir, "content/bookmarks")
|
|
if !isDir(outputDir) {
|
|
fmt.Printf(
|
|
"Usage: %s [directory]\n [directory] should be the root of your hugo blog (current: %s)\n",
|
|
path.Base(os.Args[0]),
|
|
rootDir,
|
|
)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Make the GraphQL request
|
|
articles, err := omnivoreArticles(
|
|
"in:archive has:highlights sort:updated-des",
|
|
apiKey,
|
|
)
|
|
if err != nil {
|
|
fmt.Println("Failed retrieve articles:", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
for _, article := range articles {
|
|
if err := outputArticle(article, outputDir); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to output article: %v\n", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func isDir(pathStr string) bool {
|
|
st, err := os.Stat(pathStr)
|
|
if os.IsNotExist(err) {
|
|
return false
|
|
}
|
|
|
|
return st.IsDir()
|
|
}
|
|
|
|
var hashtags = regexp.MustCompile(`#\w+`)
|
|
|
|
func outputArticle(article Article, outputDir string) error {
|
|
artEmoji, artTitle := shared.ExtractLeadingEmoji(article.Title)
|
|
|
|
slug := kebab(artTitle)
|
|
articlePath := path.Join(outputDir, fmt.Sprintf("%s.md", slug))
|
|
dirArticlePath := path.Join(outputDir, slug, "index.md")
|
|
|
|
if _, err := os.Stat(dirArticlePath); err == nil {
|
|
articlePath = dirArticlePath
|
|
}
|
|
|
|
fm, body, _ := loadFrontmatter(articlePath)
|
|
|
|
hugoPost, err := os.Create(articlePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if fm.Date == "" {
|
|
fm.Date = article.BookmarkDate.Format(time.RFC3339)
|
|
}
|
|
|
|
if body == "" {
|
|
body = strings.TrimSpace(article.Annotation)
|
|
}
|
|
|
|
if fm.Summary == "" {
|
|
var summary string
|
|
summary, body = shared.ExtractSummary(article.OriginalSummary, body)
|
|
fm.Summary = summary
|
|
}
|
|
|
|
if len(fm.Title) == 0 {
|
|
fm.Emoji, fm.Title = artEmoji, artTitle
|
|
}
|
|
|
|
fm.BookmarkOf = article.OriginalURL
|
|
fm.Tags = removeDupes(append(fm.Tags, article.Tags...))
|
|
|
|
if fm.References == nil {
|
|
fm.References = make(map[string]Ref)
|
|
}
|
|
ref := fm.References["bookmark"]
|
|
ref.URL = article.OriginalURL
|
|
ref.Type = "entry"
|
|
ref.Name = article.OriginalTitle
|
|
ref.Summary = article.OriginalSummary
|
|
ref.Author = article.OriginalAuthor
|
|
fm.References["bookmark"] = ref
|
|
|
|
if !article.PublishDate.IsZero() {
|
|
fm.PublishDate = article.PublishDate.Format(time.RFC3339)
|
|
}
|
|
|
|
fmt.Fprintln(hugoPost, "---")
|
|
|
|
if err := yaml.NewEncoder(hugoPost).Encode(fm); err != nil {
|
|
return err
|
|
}
|
|
|
|
fmt.Fprint(hugoPost, "---\n")
|
|
fmt.Fprintln(hugoPost, linkHashtags(body, fm.Tags))
|
|
|
|
if len(article.Highlights) > 0 {
|
|
fmt.Fprint(hugoPost, "\n### Highlights\n")
|
|
}
|
|
|
|
for i, highlight := range article.Highlights {
|
|
quote := "> " + strings.ReplaceAll(trimQuote(highlight.Quote), "\n", "\n> ")
|
|
fmt.Fprint(hugoPost, "\n"+quote+"\n")
|
|
|
|
if highlight.Comment != "" {
|
|
fmt.Fprint(hugoPost, "\n"+linkHashtags(highlight.Comment, fm.Tags)+"\n")
|
|
}
|
|
|
|
if i < len(article.Highlights)-1 {
|
|
fmt.Fprint(hugoPost, "\n---\n")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
var allBold = regexp.MustCompile(`\*\*([^*]+)\*\*(\W)?`)
|
|
|
|
func removeDupes(tags []string) []string {
|
|
slices.Sort(tags)
|
|
return slices.Compact(tags)
|
|
}
|
|
|
|
func trimQuote(quote string) string {
|
|
noTrail := strings.TrimRight(quote, "\n ")
|
|
return allBold.ReplaceAllString(noTrail, "$1$2")
|
|
}
|
|
|
|
func loadFrontmatter(path string) (FrontMatter, string, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return FrontMatter{}, "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
decoder := yaml.NewDecoder(f)
|
|
var fm FrontMatter
|
|
if err := decoder.Decode(&fm); err != nil {
|
|
return FrontMatter{}, "", err
|
|
}
|
|
|
|
rest, err := io.ReadAll(f)
|
|
return fm, string(rest), err
|
|
}
|
|
|
|
func linkHashtags(text string, tags []string) string {
|
|
return hashtags.ReplaceAllStringFunc(text, func(hashtag string) string {
|
|
tags = append(tags, hashtag[1:])
|
|
return fmt.Sprintf("[%s](/tags/%s)", hashtag[1:], strings.ToLower(hashtag[1:]))
|
|
})
|
|
}
|
|
|
|
var kebaber = regexp.MustCompile(`[^a-zA-Z0-9]+`)
|
|
|
|
func kebab(str string) string {
|
|
return kebaber.ReplaceAllString(strings.ToLower(str), "-")
|
|
}
|
|
|
|
type GraphQLRequest struct {
|
|
Query string `json:"query"`
|
|
Variables map[string]interface{} `json:"variables"`
|
|
}
|
|
|
|
const omnivoreEndpoint = "https://api-prod.omnivore.app/api/graphql"
|
|
|
|
type Article struct {
|
|
ID string
|
|
Title string
|
|
Emoji string
|
|
BookmarkDate time.Time
|
|
PublishDate time.Time
|
|
OriginalTitle string
|
|
OriginalURL string
|
|
OriginalSummary string
|
|
OriginalAuthor string
|
|
Annotation string
|
|
Highlights []ArticleHighlight
|
|
Tags []string
|
|
}
|
|
|
|
type ArticleHighlight struct {
|
|
Quote string
|
|
Comment string
|
|
Position float64
|
|
}
|
|
|
|
type FrontMatter struct {
|
|
Title string
|
|
Date string
|
|
Emoji string `yaml:"emoji,omitempty"`
|
|
PublishDate string `yaml:"publishDate,omitempty"`
|
|
BookmarkOf string `yaml:"bookmarkOf"`
|
|
References map[string]Ref
|
|
Summary string `yaml:"summary,omitempty"`
|
|
Tags []string `yaml:"tags,omitempty"`
|
|
}
|
|
|
|
type Ref struct {
|
|
URL string `yaml:"url"`
|
|
Rel string `yaml:"rel,omitempty"`
|
|
Type string `yaml:"type,omitempty"`
|
|
Name string `yaml:"name"`
|
|
Summary string `yaml:"summary,omitempty"`
|
|
Author string `yaml:"author,omitempty"`
|
|
}
|
|
|
|
type SearchResults struct {
|
|
Data struct {
|
|
Search struct {
|
|
Edges []struct {
|
|
Node SearchResult
|
|
}
|
|
PageInfo struct {
|
|
HasNextPage bool `json:"hasNextPage"`
|
|
EndCursor string `json:"endCursor"`
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type SearchResult struct {
|
|
ID string `json:"id"`
|
|
Title string `json:"title"`
|
|
OriginalArticleURL string `json:"originalArticleUrl"`
|
|
Author string `json:"author"`
|
|
PublishedAt string `json:"publishedAt"`
|
|
ReadAt string `json:"readAt"`
|
|
Description string `json:"description"`
|
|
Highlights []Highlight
|
|
Labels []struct {
|
|
Name string `json:"name"`
|
|
} `json:"labels"`
|
|
}
|
|
|
|
type Highlight struct {
|
|
Type string `json:"type"`
|
|
Position float64 `json:"highlightPositionPercent"`
|
|
Annotation string `json:"annotation"`
|
|
Quote string `json:"quote"`
|
|
}
|
|
|
|
func omnivoreArticles(query string, apiKey string) ([]Article, error) {
|
|
cursor := ""
|
|
var articles []Article
|
|
for {
|
|
newArticles, nextCursor, err := omnivoreRequest(query, apiKey, cursor)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
articles = append(articles, newArticles...)
|
|
if len(nextCursor) == 0 {
|
|
break
|
|
}
|
|
cursor = nextCursor
|
|
}
|
|
return articles, nil
|
|
}
|
|
|
|
func omnivoreRequest(query, apiKey, cursor string) ([]Article, string, error) {
|
|
request := GraphQLRequest{
|
|
Query: gql,
|
|
Variables: map[string]interface{}{
|
|
"query": query,
|
|
"after": cursor,
|
|
},
|
|
}
|
|
|
|
requestJSON, err := json.Marshal(request)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", omnivoreEndpoint, bytes.NewBuffer(requestJSON))
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
req.Header.Set("Authorization", apiKey)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
client := &http.Client{}
|
|
res, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
if res.StatusCode != http.StatusOK {
|
|
return nil, "", fmt.Errorf("unexpected status code: %d", res.StatusCode)
|
|
}
|
|
|
|
body, err := io.ReadAll(res.Body)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
return parseResponse(body)
|
|
}
|
|
|
|
var titleSplitter = regexp.MustCompile(`\s[-–—|]\s`)
|
|
|
|
func parseResponse(body []byte) ([]Article, string, error) {
|
|
var searchResults SearchResults
|
|
if err := json.Unmarshal(body, &searchResults); err != nil {
|
|
return nil, "", err
|
|
}
|
|
|
|
var articles []Article
|
|
for _, edge := range searchResults.Data.Search.Edges {
|
|
sr := edge.Node
|
|
|
|
articleURL := stripMarketing(sr.OriginalArticleURL)
|
|
|
|
var highlights []ArticleHighlight
|
|
var annotation string
|
|
for _, highlight := range sr.Highlights {
|
|
if highlight.Type == "NOTE" {
|
|
annotation = highlight.Annotation
|
|
} else {
|
|
highlights = append(highlights, ArticleHighlight{
|
|
Quote: highlight.Quote,
|
|
Comment: highlight.Annotation,
|
|
Position: highlight.Position,
|
|
})
|
|
}
|
|
}
|
|
|
|
sort.Sort(ByPosition(highlights))
|
|
|
|
if len(annotation) == 0 {
|
|
continue
|
|
}
|
|
|
|
bookmarked, err := time.Parse(time.RFC3339, sr.ReadAt)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to parse ReadAt date: %s\n", sr.ID)
|
|
continue
|
|
}
|
|
published, err := time.Parse(time.RFC3339, sr.PublishedAt)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to parse PublishedAt date (for %s): %s\n", articleURL, sr.ID)
|
|
}
|
|
|
|
abbreviatedOriginalTitle := sr.Title
|
|
if parts := titleSplitter.Split(abbreviatedOriginalTitle, -1); len(parts) > 1 {
|
|
abbreviatedOriginalTitle = parts[0]
|
|
for _, part := range parts[1:] {
|
|
if len(part) > len(abbreviatedOriginalTitle) {
|
|
abbreviatedOriginalTitle = part
|
|
}
|
|
}
|
|
}
|
|
title := abbreviatedOriginalTitle
|
|
if annotation[0:2] == "# " {
|
|
parts := strings.SplitN(annotation, "\n", 2)
|
|
title = parts[0][2:]
|
|
annotation = parts[1]
|
|
}
|
|
|
|
article := Article{
|
|
ID: sr.ID,
|
|
Title: title,
|
|
OriginalTitle: abbreviatedOriginalTitle,
|
|
OriginalURL: articleURL,
|
|
OriginalAuthor: sr.Author,
|
|
OriginalSummary: sr.Description,
|
|
BookmarkDate: bookmarked,
|
|
PublishDate: published,
|
|
Highlights: highlights,
|
|
Annotation: annotation,
|
|
}
|
|
|
|
for _, label := range sr.Labels {
|
|
if slices.Contains(ignoreLabels, label.Name) {
|
|
continue
|
|
}
|
|
article.Tags = append(article.Tags, label.Name)
|
|
}
|
|
|
|
articles = append(articles, article)
|
|
}
|
|
|
|
var cursor string
|
|
if searchResults.Data.Search.PageInfo.HasNextPage {
|
|
cursor = searchResults.Data.Search.PageInfo.EndCursor
|
|
}
|
|
|
|
return articles, cursor, nil
|
|
}
|
|
|
|
func stripMarketing(rawURL string) string {
|
|
u, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to parse URL: %s\n", rawURL)
|
|
return rawURL
|
|
}
|
|
|
|
q := u.Query()
|
|
q.Del("amp")
|
|
q.Del("utm_source")
|
|
q.Del("utm_medium")
|
|
q.Del("utm_campaign")
|
|
q.Del("utm_content")
|
|
q.Del("utm_term")
|
|
u.RawQuery = q.Encode()
|
|
|
|
return u.String()
|
|
}
|
|
|
|
type ByPosition []ArticleHighlight
|
|
|
|
func (p ByPosition) Len() int { return len(p) }
|
|
func (p ByPosition) Less(i, j int) bool { return p[i].Position < p[j].Position }
|
|
func (p ByPosition) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|