mirror of
https://github.com/by-jp/www.byjp.me.git
synced 2025-08-22 22:13:12 +01:00
Extract title & emoji from Omnivore
Takes first annotation lines as titles, and extracts emoji from before the first space too.
This commit is contained in:
parent
796692cd5d
commit
716e206938
6 changed files with 1437 additions and 4 deletions
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
title: Slop is the new name for unwanted AI-generated content
|
||||
title: AI "Slop"
|
||||
date: "2024-05-09T16:03:02Z"
|
||||
emoji: "\U0001F4A9"
|
||||
bookmarkOf: https://simonwillison.net/2024/May/8/slop/
|
||||
references:
|
||||
bookmark:
|
||||
|
|
|
@ -16,6 +16,7 @@ require (
|
|||
github.com/kelseyhightower/envconfig v1.4.0
|
||||
github.com/mattn/go-mastodon v0.0.6
|
||||
github.com/mmcdole/gofeed v1.3.0
|
||||
github.com/stretchr/testify v1.8.2
|
||||
golang.org/x/image v0.15.0
|
||||
golang.org/x/net v0.21.0
|
||||
golang.org/x/text v0.14.0
|
||||
|
@ -82,7 +83,6 @@ require (
|
|||
github.com/smartystreets/assertions v1.2.0 // indirect
|
||||
github.com/smartystreets/goconvey v1.7.2 // indirect
|
||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||
github.com/stretchr/testify v1.8.2 // indirect
|
||||
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 // indirect
|
||||
github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 // indirect
|
||||
github.com/whyrusleeping/cbor-gen v0.0.0-20230331140348-1f892b517e70 // indirect
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/by-jp/www.byjp.me/tools/shared"
|
||||
"github.com/joho/godotenv"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
@ -98,8 +99,16 @@ func outputArticle(article Article, outputDir string) error {
|
|||
fm.Date = article.BookmarkDate.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
article.Annotation = strings.TrimSpace(article.Annotation)
|
||||
|
||||
if len(fm.Title) == 0 {
|
||||
fm.Title = article.Title
|
||||
if strings.HasPrefix(article.Annotation, "# ") {
|
||||
parts := strings.SplitAfterN(article.Annotation, "\n", 2)
|
||||
article.Annotation = strings.TrimSpace(parts[1])
|
||||
fm.Emoji, fm.Title = shared.ExtractLeadingEmoji(parts[0][2:])
|
||||
} else {
|
||||
fm.Title = article.Title
|
||||
}
|
||||
}
|
||||
fm.BookmarkOf = article.OriginalURL
|
||||
fm.Tags = removeDupes(append(fm.Tags, article.Tags...))
|
||||
|
@ -126,7 +135,7 @@ func outputArticle(article Article, outputDir string) error {
|
|||
}
|
||||
|
||||
fmt.Fprint(hugoPost, "---\n")
|
||||
fmt.Fprintln(hugoPost, linkHashtags(strings.TrimSpace(article.Annotation), fm.Tags))
|
||||
fmt.Fprintln(hugoPost, linkHashtags(article.Annotation, fm.Tags))
|
||||
|
||||
if len(article.Highlights) > 0 {
|
||||
fmt.Fprint(hugoPost, "\n### Highlights\n")
|
||||
|
|
1320
tools/shared/emoji-data.txt
Normal file
1320
tools/shared/emoji-data.txt
Normal file
File diff suppressed because it is too large
Load diff
78
tools/shared/text.go
Normal file
78
tools/shared/text.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
package shared
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"embed"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed emoji-data.txt
|
||||
var edf embed.FS
|
||||
var emojiCodePoints map[rune]struct{}
|
||||
|
||||
func init() {
|
||||
f, err := edf.Open("emoji-data.txt")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
emojiCodePoints = make(map[rune]struct{})
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.Split(line, ";")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
codeToRune := func(str string) rune {
|
||||
cp := new(big.Int)
|
||||
cp.SetString(str, 16)
|
||||
return rune(cp.Int64())
|
||||
}
|
||||
|
||||
codepoints := strings.Split(strings.TrimSpace(parts[0]), "..")
|
||||
switch len(codepoints) {
|
||||
case 1:
|
||||
emojiCodePoints[codeToRune(codepoints[0])] = struct{}{}
|
||||
case 2:
|
||||
a := codeToRune(codepoints[0])
|
||||
b := codeToRune(codepoints[1])
|
||||
for i := a; i <= b; i++ {
|
||||
emojiCodePoints[i] = struct{}{}
|
||||
}
|
||||
default:
|
||||
panic("Unknown emoji-data.txt database")
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// NB. emoji-data.txt needs to be up to date from https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
|
||||
func ExtractLeadingEmoji(str string) (string, string) {
|
||||
parts := strings.SplitN(str, " ", 2)
|
||||
if len(parts) == 1 {
|
||||
return "", str
|
||||
}
|
||||
|
||||
for _, c := range parts[0] {
|
||||
if _, ok := emojiCodePoints[c]; !ok {
|
||||
fmt.Printf("%c: %d\n", c, c)
|
||||
return "", str
|
||||
}
|
||||
}
|
||||
|
||||
return parts[0], strings.TrimSpace(parts[1])
|
||||
}
|
25
tools/shared/text_test.go
Normal file
25
tools/shared/text_test.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
package shared_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/by-jp/www.byjp.me/tools/shared"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestExtractLeadingEmoji(t *testing.T) {
|
||||
cases := [][]string{
|
||||
{"No Emoji here", "", "No Emoji here"},
|
||||
{"A single character start", "", "A single character start"},
|
||||
{"😊 The first emoji", "😊", "The first emoji"},
|
||||
{"😊 \t Extra space", "😊", "Extra space"},
|
||||
{"🍋🟩 15.1 emoji", "🍋🟩", "15.1 emoji"},
|
||||
{"絵文字", "", "絵文字"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
emoji, text := shared.ExtractLeadingEmoji(c[0])
|
||||
assert.Equal(t, c[1], emoji)
|
||||
assert.Equal(t, c[2], text)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue