www.byjp.me/tools/shared/text.go
2024-10-16 14:07:25 +01:00

94 lines
1.9 KiB
Go

package shared
import (
"bufio"
"embed"
"math/big"
"strings"
)
//go:embed emoji-data.txt
var edf embed.FS
var emojiCodePoints map[rune]struct{}
func init() {
f, err := edf.Open("emoji-data.txt")
if err != nil {
panic(err)
}
defer f.Close()
emojiCodePoints = make(map[rune]struct{})
scanner := bufio.NewScanner(f)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || strings.HasPrefix(line, "#") {
continue
}
parts := strings.Split(line, ";")
if len(parts) < 2 {
continue
}
codeToRune := func(str string) rune {
cp := new(big.Int)
cp.SetString(str, 16)
return rune(cp.Int64())
}
codepoints := strings.Split(strings.TrimSpace(parts[0]), "..")
switch len(codepoints) {
case 1:
emojiCodePoints[codeToRune(codepoints[0])] = struct{}{}
case 2:
a := codeToRune(codepoints[0])
b := codeToRune(codepoints[1])
for i := a; i <= b; i++ {
emojiCodePoints[i] = struct{}{}
}
default:
panic("Unknown emoji-data.txt database")
}
}
if err := scanner.Err(); err != nil {
panic(err)
}
}
// NB. emoji-data.txt needs to be up to date from https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
func ExtractLeadingEmoji(str string) (string, string) {
parts := strings.SplitN(str, " ", 2)
if len(parts) == 1 {
return "", str
}
for _, c := range parts[0] {
if _, ok := emojiCodePoints[c]; !ok {
return "", str
}
}
return parts[0], strings.TrimSpace(parts[1])
}
const maxSummaryLength = 140
func ExtractSummary(inSummary, inBody string) (outSummary, outBody string) {
outBody = inBody
if len(inSummary) <= maxSummaryLength {
outSummary = inSummary
}
parts := strings.SplitN(inBody, "\n\n", 2)
if len(parts[0]) <= maxSummaryLength {
outSummary = parts[0]
parts = append(parts, parts[0])
outBody = parts[1]
}
return outSummary, outBody
}