Prepare for Instragm posts

This commit is contained in:
JP Hastings-Spital 2023-04-29 10:54:25 +01:00
parent 9adafb2130
commit 791fd82f64
7 changed files with 378 additions and 2 deletions

View file

@ -0,0 +1,54 @@
{{ define "main" }}
<main class="post">
<article>
<div class="post-content">
{{ range .Params.Media }}
<figure><img src="{{ . }}" /></figure>
{{ end }}
{{ .Content }}
</div>
</article>
<hr />
<div class="post-info">
<p>
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather"><rect x="2" y="2" width="20" height="20" rx="5" ry="5"></rect><path d="M16 11.37A4 4 0 1 1 12.63 8 4 4 0 0 1 16 11.37z"></path><line x1="17.5" y1="6.5" x2="17.5" y2="6.5"></line></svg>
From <a href="../">Instagram archive</a>
</p>
{{ partial "tags.html" .Params.tags }}
{{ partial "categories.html" . }}
{{- if .GitInfo }}
<p><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-git-commit"><circle cx="12" cy="12" r="4"></circle><line x1="1.05" y1="12" x2="7" y2="12"></line><line x1="17.01" y1="12" x2="22.96" y2="12"></line></svg><a href="{{ .Site.Params.gitUrl -}}{{ .GitInfo.Hash }}" target="_blank" rel="noopener">{{ .GitInfo.AbbreviatedHash }}</a> @ {{ if .Site.Params.dateformNum }}{{ dateFormat .Site.Params.dateformNum .GitInfo.AuthorDate.Local }}{{ else }}{{ dateFormat "2006-01-02" .GitInfo.AuthorDate.Local }}{{ end }}</p>
{{- end }}
<p>
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-calendar">
<rect x="3" y="4" width="18" height="18" rx="2" ry="2"></rect>
<line x1="16" y1="2" x2="16" y2="6"></line>
<line x1="8" y1="2" x2="8" y2="6"></line>
<line x1="3" y1="10" x2="21" y2="10"></line>
</svg>
{{ if .Site.Params.dateformNumTime }}
{{ dateFormat .Site.Params.dateformNumTime .Date.Local }}
{{ else }}
{{ dateFormat "2006-01-02 15:04" .Date.Local }}
{{ end }}
{{ if .Lastmod }}
{{ if not (eq .Lastmod .Date )}}
{{ if .Site.Params.dateformNumTime }}
({{ i18n "lastModified" }}: {{ dateFormat .Site.Params.dateformNumTime .Lastmod.Local }})
{{ else }}
({{ i18n "lastModified" }}: {{ dateFormat "2006-01-02 15:04" .Lastmod.Local }})
{{ end }}
{{ end }}
{{ end }}
</p>
{{ partial "pagination-single.html" . }}
</div>
</main>
{{ end }}

View file

@ -2,7 +2,7 @@
{{ $paginator := .Paginate .Data.Pages }}
<main class="posts">
<h1>{{ .Title }}</h1>
<h1>{{ title (replace .Title "-" " ") }}</h1>
{{ if .Content }}
<div class="content">{{ .Content }}</div>

View file

@ -41,7 +41,6 @@
<hr />
<div class="post-info">
{{ .Params.tags }}
{{ partial "tags.html" .Params.tags }}
{{ partial "categories.html" . }}

View file

@ -0,0 +1,9 @@
# Instagram archive
This tool will take an [Instagram data archive](https://help.instagram.com/181231772500920) and turn it into a series of timestamped posts for your Hugo blog.
```
go run . <path/to/archive/username_yyyymmdd.zip> <path/to/hugo/root/>
```
It will create one folder per post in the `content/instagram-posts` directory. You can change how those posts look by creating a `layouts/instgram-posts/single.html` template.

View file

@ -0,0 +1,8 @@
module github.com/jphastings/www.byjp.me/tools/archive/instagram
go 1.20
require (
golang.org/x/text v0.9.0
gopkg.in/yaml.v2 v2.4.0
)

View file

@ -0,0 +1,6 @@
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

View file

@ -0,0 +1,300 @@
package main
import (
"archive/zip"
"crypto/md5"
"encoding/base32"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path"
"regexp"
"strings"
"time"
"golang.org/x/text/encoding/charmap"
"gopkg.in/yaml.v2"
)
const titleLength = 48
const minLength = 16
var tagMap = map[string]string{
"@andyrobert1729": "Andy",
"@artphilm": "Phil",
"@beckyfuzzymuzzy": "Becky",
"@bickertonjane": "Auntie Jane",
"@bratpack_ldn": "The Brat Pack",
"@buckettafloat": "Erica",
"@buckettsails": "Joe",
"@chrismhs": "Chris",
"@dxcompton": "Dave",
"@ericabuckett8": "Erica",
"@esther_dr": "Esther",
"@hazanj99": "Jenny",
"@hazanjon": "Jon",
"@helenhs16": "Mum",
"@joostsposts": "Joost",
"@kaphleenmurthy": "Kathleen",
"@le_boyd": "Leanne",
"@lydiadr": "Lydia",
"@Mr_Bingo": "Mr. Bingo",
"@ponkalulu": "Caitlin",
"@rosalysbryan": "Rose",
"@spagbol_terol": "Paul",
"@yvetteedrei": "Yvette",
}
func check(err error, msg string) {
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n %v", msg, err)
os.Exit(1)
}
}
type closer interface {
Close() error
}
func doClose(c closer, msg string) {
check(c.Close(), msg)
}
type location struct {
Name string
Latitude float64
Longitude float64
}
type frontMatter struct {
Title string
Media []string
Date string
Draft bool
Tags []string
Locations []location
}
type post struct {
Media []media
Title string
CreationTimestamp int64 `json:"creation_timestamp"`
}
type media struct {
URI string
CreationTimestamp int64 `json:"creation_timestamp"`
Title string
MediaMetadata map[string]interface{}
}
func main() {
if len(os.Args) != 3 {
fmt.Fprintf(os.Stderr, "Usage: %s <path/to/instagram/username_yyyymmdd.zip> <path/to/hugo>\n", os.Args[0])
}
archive := os.Args[1]
hugo := os.Args[2]
outputDir := path.Join(hugo, "content", "instagram-posts")
zf, err := zip.OpenReader(archive)
check(err, "Unable to open instagram archive")
defer doClose(zf, "Unable to close zipfile")
postCount, mediaMap, err := createPosts(zf, outputDir)
check(err, "Unable to create hugo posts for your instagram data")
// TODO: Rewind zip?
check(copyMedia(zf, mediaMap), "Unable to copy media to your hugo blog")
fmt.Printf("Success! %d Instagram posts (with %d images and videos) were added to your hugo blog.\n", postCount, len(mediaMap))
}
var postsFile = regexp.MustCompile(`\Acontent/posts_(\d+)\.json\z`)
func createPosts(zf *zip.ReadCloser, outputDir string) (int, map[string]string, error) {
for _, f := range zf.File {
match := postsFile.FindStringSubmatch(f.Name)
if len(match) == 0 {
continue
}
jf, err := f.Open()
if err != nil {
return 0, nil, err
}
defer doClose(jf, "Unable to close posts file within archive")
return postsFromFile(jf, outputDir)
}
return 0, nil, errors.New("no content/posts_1.json file found in zip file")
}
func postsFromFile(r io.Reader, outputDir string) (int, map[string]string, error) {
postsCount := 0
mediaMap := make(map[string]string)
dec := json.NewDecoder(r)
// Opening [
tok, err := dec.Token()
if err != nil {
return 0, nil, err
}
if fmt.Sprintf("%s", tok) != "[" {
fmt.Println(tok)
return 0, nil, errors.New("posts JSON doesn't start with '['")
}
for dec.More() {
var p post
if err := dec.Decode(&p); err != nil {
return postsCount, mediaMap, errors.New("unable to decode JSON")
}
if err := postToPost(p, mediaMap, outputDir); err != nil {
return postsCount, mediaMap, err
}
postsCount++
}
return postsCount, mediaMap, nil
}
var hashtag = regexp.MustCompile(`[#@]\w+`)
func postToPost(p post, mediaMap map[string]string, outputDir string) error {
id, err := postHash(p)
if err != nil {
return err
}
postDir := path.Join(outputDir, id)
if err := os.MkdirAll(postDir, 0750); err != nil {
return err
}
text := p.Title
if text == "" {
text = p.Media[0].Title
}
if text, err = fixEncoding(text); err != nil {
return err
}
fm := frontMatter{}
fm.Title = text
if len(fm.Title) > titleLength {
newEnd := minLength
for i := titleLength; i > minLength; i-- {
if fm.Title[i] == ' ' {
newEnd = i
break
}
}
fm.Title = fm.Title[:newEnd] + "…"
if idx := strings.Index(fm.Title, "\n"); idx != -1 {
fm.Title = fm.Title[:idx]
}
}
text = escapeMarkdown(text)
// Add hashtags
text = hashtag.ReplaceAllStringFunc(text, func(s string) string {
if name, ok := tagMap[s]; ok {
tag := strings.ToLower(name)
fm.Tags = append(fm.Tags, tag)
return fmt.Sprintf("[%s](/tags/%s)", name, tag)
} else if s[:1] == "@" {
fmt.Println(s)
return fmt.Sprintf("[%s](https://instagram.com/%s)", s, s[1:])
} else {
fm.Tags = append(fm.Tags, s[1:])
return fmt.Sprintf("[%s](/tags/%s)", s, s[1:])
}
})
// Add media
for i, m := range p.Media {
ext := path.Ext(m.URI)
if ext == "" {
// Some video files have their extensions omitted
ext = ".mp4"
}
hugoName := fmt.Sprintf("media-%d%s", i, ext)
hugoMedia := path.Join(postDir, hugoName)
fm.Media = append(fm.Media, hugoName)
mediaMap[m.URI] = hugoMedia
}
// Add date
publishedAt := time.Unix(p.Media[0].CreationTimestamp, 0).UTC()
fm.Date = publishedAt.Format(time.RFC3339)
// Create post
hugoPost, err := os.Create(path.Join(postDir, "index.md"))
if err != nil {
return err
}
fmt.Fprintln(hugoPost, "---")
if err := yaml.NewEncoder(hugoPost).Encode(fm); err != nil {
return err
}
fmt.Fprintln(hugoPost, "---")
fmt.Fprintln(hugoPost, text)
return nil
}
var fixEncoding = charmap.ISO8859_1.NewEncoder().String
var markdownEscapable = regexp.MustCompile(`([!\[\]\(\)])`)
func escapeMarkdown(str string) string {
text := strings.ReplaceAll(str, "\n", "\n\n")
return markdownEscapable.ReplaceAllString(text, `\$1`)
}
func postHash(p post) (string, error) {
h := md5.New()
enc := json.NewEncoder(h)
if err := enc.Encode(p); err != nil {
return "", err
}
b64 := base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(h.Sum(nil))
return strings.ToLower(b64), nil
}
func copyMedia(zf *zip.ReadCloser, mediaMap map[string]string) error {
for _, f := range zf.File {
dst, ok := mediaMap[f.Name]
if !ok {
continue
}
mf, err := f.Open()
if err != nil {
return err
}
defer doClose(mf, "Unable to close media file within archive")
mediaFile, err := os.Create(dst)
if err != nil {
return err
}
defer doClose(mediaFile, "Unable to close media file in blog archive")
io.Copy(mediaFile, mf)
}
return nil
}