hugo-obsidian/main.go

260 lines
5.7 KiB
Go
Raw Permalink Normal View History

2021-07-17 06:26:40 +03:00
package main
import (
"bytes"
2021-07-17 07:51:06 +03:00
"flag"
2021-07-17 08:26:45 +03:00
"fmt"
2021-10-25 08:50:53 +03:00
"github.com/gernest/front"
2021-07-17 07:38:12 +03:00
"gopkg.in/yaml.v3"
2021-07-17 06:26:40 +03:00
"io/fs"
"io/ioutil"
2021-07-17 07:51:06 +03:00
"path"
2021-07-17 06:26:40 +03:00
"path/filepath"
2021-07-17 06:32:47 +03:00
"strings"
"github.com/PuerkitoBio/goquery"
wikilink "github.com/abhinav/goldmark-wikilink"
"github.com/yuin/goldmark"
2021-07-17 06:26:40 +03:00
)
var md goldmark.Markdown
func init() {
md = goldmark.New(
goldmark.WithExtensions(&wikilink.Extender{}),
)
}
2021-07-17 06:26:40 +03:00
type Link struct {
Source string
Target string
Text string
}
2021-07-17 07:01:08 +03:00
type LinkTable = map[string][]Link
type Index struct {
Links LinkTable
Backlinks LinkTable
}
2021-08-25 20:05:12 +03:00
type Content struct {
Title string
Content string
}
type ContentIndex = map[string]Content
2021-07-17 06:32:47 +03:00
func trim(source, prefix, suffix string) string {
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
}
2021-07-17 18:39:53 +03:00
func hugoPathTrim(source string) string {
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
}
2021-07-18 18:50:13 +03:00
func processTarget(source string) string {
2021-07-18 23:30:37 +03:00
if !isInternal(source) {
return source
}
2021-07-18 18:50:13 +03:00
if strings.HasPrefix(source, "/") {
return strings.TrimSuffix(source, ".md")
}
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
2021-07-18 18:50:13 +03:00
}
2021-07-18 23:30:37 +03:00
func isInternal(link string) bool {
return !strings.HasPrefix(link, "http")
}
2021-07-17 06:32:47 +03:00
// parse single file for links
func parse(dir, pathPrefix string) []Link {
2021-07-17 06:26:40 +03:00
// read file
source, err := ioutil.ReadFile(dir)
2021-07-17 06:26:40 +03:00
if err != nil {
panic(err)
}
// parse md
var links []Link
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
var buf bytes.Buffer
if err := md.Convert(source, &buf); err != nil {
panic(err)
}
doc, err := goquery.NewDocumentFromReader(&buf)
var n int
doc.Find("a").Each(func(i int, s *goquery.Selection) {
text := strings.TrimSpace(s.Text())
target, ok := s.Attr("href")
if !ok {
target = "#"
}
target = strings.Replace(target, "%20", " ", -1)
target = strings.Split(processTarget(target), "#")[0]
target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1)
fmt.Printf(" '%s' => %s\n", text, target)
2021-07-17 06:26:40 +03:00
links = append(links, Link{
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
2021-08-25 20:05:12 +03:00
Target: target,
2021-07-17 06:26:40 +03:00
Text: text,
})
n++
})
fmt.Printf(":: %d links\n", n)
2021-07-17 06:26:40 +03:00
return links
}
2021-08-25 20:05:12 +03:00
func getText(dir string) string {
// read file
bytes, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
return string(bytes)
}
2021-07-17 06:32:47 +03:00
// recursively walk directory and return all files with given extension
2021-08-25 20:05:12 +03:00
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
2021-07-17 07:51:06 +03:00
println(root)
2021-08-25 20:05:12 +03:00
i = make(ContentIndex)
2021-10-25 08:50:53 +03:00
m := front.NewMatter()
m.Handle("---", front.YAMLHandler)
nPrivate := 0
2021-08-25 20:05:12 +03:00
2021-07-17 07:01:08 +03:00
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
2021-07-17 06:26:40 +03:00
if e != nil {
return e
}
if filepath.Ext(d.Name()) == ext {
2021-07-17 07:01:08 +03:00
res = append(res, parse(s, root)...)
2021-08-25 20:05:12 +03:00
if index {
text := getText(s)
2021-10-25 08:50:53 +03:00
frontmatter, body, err := m.Parse(strings.NewReader(text))
if err != nil {
2021-10-25 08:56:24 +03:00
frontmatter = map[string]interface{}{}
body = text
2021-10-25 08:50:53 +03:00
}
2021-08-25 20:05:12 +03:00
var title string
2021-10-25 08:50:53 +03:00
if parsedTitle, ok := frontmatter["title"]; ok {
title = parsedTitle.(string)
2021-08-25 20:05:12 +03:00
} else {
title = "Untitled Page"
}
2021-10-25 08:50:53 +03:00
// check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)
2021-10-25 08:50:53 +03:00
i[adjustedPath] = Content{
Title: title,
Content: body,
}
} else {
nPrivate++
2021-08-25 20:05:12 +03:00
}
}
2021-07-17 06:26:40 +03:00
}
return nil
})
2021-07-17 07:01:08 +03:00
if err != nil {
panic(err)
}
2021-10-25 08:50:53 +03:00
fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res))
2021-08-25 20:05:12 +03:00
return res, i
2021-07-17 07:01:08 +03:00
}
2021-07-17 07:38:12 +03:00
// filter out certain links (e.g. to media)
func filter(links []Link) (res []Link) {
for _, l := range links {
// filter external and non-md
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
2021-07-18 23:30:37 +03:00
if isInternal(l.Target) && isMarkdown {
2021-07-17 07:38:12 +03:00
res = append(res, l)
}
}
2021-10-25 08:50:53 +03:00
fmt.Printf("Removed %d external and non-markdown links\n", len(links) - len(res))
2021-07-17 07:38:12 +03:00
return res
}
2021-07-17 07:01:08 +03:00
// constructs index from links
func index(links []Link) (index Index) {
linkMap := make(map[string][]Link)
backlinkMap := make(map[string][]Link)
for _, l := range links {
2021-07-17 07:38:12 +03:00
// backlink (only if internal)
2021-07-17 18:39:53 +03:00
if _, ok := backlinkMap[l.Target]; ok {
2021-07-17 21:14:29 +03:00
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
2021-07-17 07:01:08 +03:00
} else {
2021-07-17 21:14:29 +03:00
backlinkMap[l.Target] = []Link{l}
2021-07-17 07:01:08 +03:00
}
2021-07-17 07:38:12 +03:00
// regular link
2021-07-17 21:14:29 +03:00
if _, ok := linkMap[l.Source]; ok {
linkMap[l.Source] = append(linkMap[l.Source], l)
2021-07-17 07:01:08 +03:00
} else {
2021-07-17 21:14:29 +03:00
linkMap[l.Source] = []Link{l}
2021-07-17 07:01:08 +03:00
}
}
index.Links = linkMap
index.Backlinks = backlinkMap
return index
2021-07-17 06:26:40 +03:00
}
2021-10-25 08:50:53 +03:00
const message = "# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
2021-08-25 20:05:12 +03:00
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
2021-07-17 07:51:06 +03:00
index := index(links)
resStruct := struct{
Index Index
Links []Link
}{
Index: index,
Links: links,
}
marshalledIndex, mErr := yaml.Marshal(&resStruct)
2021-07-17 07:38:12 +03:00
if mErr != nil {
return mErr
}
2021-07-18 18:01:42 +03:00
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
2021-07-17 07:38:12 +03:00
if writeErr != nil {
return writeErr
}
2021-08-25 20:05:12 +03:00
if toIndex {
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
if mcErr != nil {
return mcErr
}
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
if writeErr != nil {
return writeErr
}
}
2021-07-17 07:38:12 +03:00
return nil
}
2021-07-17 06:26:40 +03:00
func main() {
2021-07-17 07:51:06 +03:00
in := flag.String("input", ".", "Input Directory")
out := flag.String("output", ".", "Output Directory")
2021-08-25 20:05:12 +03:00
index := flag.Bool("index", false, "Whether to index the content")
2021-07-17 07:51:06 +03:00
flag.Parse()
2021-08-25 20:05:12 +03:00
l, i := walk(*in, ".md", *index)
2021-07-17 07:38:12 +03:00
f := filter(l)
2021-08-25 20:05:12 +03:00
err := write(f, i, *index, *out)
2021-07-17 07:38:12 +03:00
if err != nil {
panic(err)
}
2021-07-17 06:26:40 +03:00
}