minor file refactoring

This commit is contained in:
Jacky Zhao 2021-12-27 16:19:05 -05:00
parent 12376291c9
commit 3b5ed071b3
9 changed files with 300 additions and 210 deletions

45
contentIndex.yaml Normal file
View File

@ -0,0 +1,45 @@
# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian
# DO NOT EDIT
README:
title: Untitled Page
content: |
# Obsidian Link Scrapper
Used by [Quartz](https://github.com/jackyzha0/quartz)
This repository comes to you in two parts.
1. GitHub Action (scrapes links into a `.yml` file)
2. Hugo Partial (turns `.yml` file into graphs and tables)
## GitHub Action
GitHub action and binary to scrape [Obsidian](http://obsidian.md/) vault for links and exposes them as a `.yml` file for easy consumption by [Hugo](https://gohugo.io/).
### Example Usage (Binary)
Read Markdown from the `/content` folder and place the resulting `linkIndex.yaml` (and `contentIndex.yaml` if the `index` flag is enabled) into `/data`
```shell
# Installation
go install github.com/jackyzha0/hugo-obsidian
# Run
hugo-obsidian -input=content -output=data -index=true
```
### Example Usage (GitHub Action)
Add 'Build Link Index' as a build step in your workflow file (e.g. `.github/workflows/deploy.yaml`)
```yaml
...
jobs:
deploy:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- name: Build Link Index
uses: jackyzha0/hugo-obsidian@v2.1
with:
input: content # input folder
output: data # output folder
index: true # whether to index content
...
```

1
go.mod
View File

@ -3,6 +3,7 @@ module github.com/jackyzha0/hugo-obsidian
go 1.16 go 1.16
require ( require (
github.com/BurntSushi/toml v0.4.1 // indirect
github.com/PuerkitoBio/goquery v1.8.0 github.com/PuerkitoBio/goquery v1.8.0
github.com/abhinav/goldmark-wikilink v0.3.0 github.com/abhinav/goldmark-wikilink v0.3.0
github.com/gernest/front v0.0.0-20210301115436-8a0b0a782d0a github.com/gernest/front v0.0.0-20210301115436-8a0b0a782d0a

2
go.sum
View File

@ -1,3 +1,5 @@
github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw=
github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/abhinav/goldmark-wikilink v0.3.0 h1:ry8CBaULn410PKCSkwLz/WVI2f/g7EB+yqY7LKHDcPQ= github.com/abhinav/goldmark-wikilink v0.3.0 h1:ry8CBaULn410PKCSkwLz/WVI2f/g7EB+yqY7LKHDcPQ=

6
linkIndex.yaml Normal file
View File

@ -0,0 +1,6 @@
# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian
# DO NOT EDIT
index:
links: {}
backlinks: {}
links: []

216
main.go
View File

@ -1,24 +1,13 @@
package main package main
import ( import (
"bytes"
"flag" "flag"
"fmt"
"github.com/gernest/front"
"gopkg.in/yaml.v3"
"io/fs"
"io/ioutil"
"path"
"path/filepath"
"strings"
"github.com/PuerkitoBio/goquery"
wikilink "github.com/abhinav/goldmark-wikilink" wikilink "github.com/abhinav/goldmark-wikilink"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
) )
var md goldmark.Markdown var md goldmark.Markdown
func init() { func init() {
md = goldmark.New( md = goldmark.New(
goldmark.WithExtensions(&wikilink.Extender{}), goldmark.WithExtensions(&wikilink.Extender{}),
@ -33,216 +22,23 @@ type Link struct {
type LinkTable = map[string][]Link type LinkTable = map[string][]Link
type Index struct { type Index struct {
Links LinkTable Links LinkTable
Backlinks LinkTable Backlinks LinkTable
} }
type Content struct { type Content struct {
Title string Title string
Content string Content string
} }
type ContentIndex = map[string]Content type ContentIndex = map[string]Content
func trim(source, prefix, suffix string) string { type IgnoredFiles struct {
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
} }
func hugoPathTrim(source string) string { func getIgnoredFiles() {
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
}
func processTarget(source string) string {
if !isInternal(source) {
return source
}
if strings.HasPrefix(source, "/") {
return strings.TrimSuffix(source, ".md")
}
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
}
func isInternal(link string) bool {
return !strings.HasPrefix(link, "http")
}
// parse single file for links
func parse(dir, pathPrefix string) []Link {
// read file
source, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
// parse md
var links []Link
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
var buf bytes.Buffer
if err := md.Convert(source, &buf); err != nil {
panic(err)
}
doc, err := goquery.NewDocumentFromReader(&buf)
var n int
doc.Find("a").Each(func(i int, s *goquery.Selection) {
text := strings.TrimSpace(s.Text())
target, ok := s.Attr("href")
if !ok {
target = "#"
}
target = strings.Replace(target, "%20", " ", -1)
target = strings.Split(processTarget(target), "#")[0]
target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1)
fmt.Printf(" '%s' => %s\n", text, target)
links = append(links, Link{
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
Target: target,
Text: text,
})
n++
})
fmt.Printf(":: %d links\n", n)
return links
}
func getText(dir string) string {
// read file
bytes, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
return string(bytes)
}
// recursively walk directory and return all files with given extension
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
println(root)
i = make(ContentIndex)
m := front.NewMatter()
m.Handle("---", front.YAMLHandler)
nPrivate := 0
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
if e != nil {
return e
}
if filepath.Ext(d.Name()) == ext {
res = append(res, parse(s, root)...)
if index {
text := getText(s)
frontmatter, body, err := m.Parse(strings.NewReader(text))
if err != nil {
frontmatter = map[string]interface{}{}
body = text
}
var title string
if parsedTitle, ok := frontmatter["title"]; ok {
title = parsedTitle.(string)
} else {
title = "Untitled Page"
}
// check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)
i[adjustedPath] = Content{
Title: title,
Content: body,
}
} else {
nPrivate++
}
}
}
return nil
})
if err != nil {
panic(err)
}
fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res))
return res, i
}
// filter out certain links (e.g. to media)
func filter(links []Link) (res []Link) {
for _, l := range links {
// filter external and non-md
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
if isInternal(l.Target) && isMarkdown {
res = append(res, l)
}
}
fmt.Printf("Removed %d external and non-markdown links\n", len(links) - len(res))
return res
}
// constructs index from links
func index(links []Link) (index Index) {
linkMap := make(map[string][]Link)
backlinkMap := make(map[string][]Link)
for _, l := range links {
// backlink (only if internal)
if _, ok := backlinkMap[l.Target]; ok {
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
} else {
backlinkMap[l.Target] = []Link{l}
}
// regular link
if _, ok := linkMap[l.Source]; ok {
linkMap[l.Source] = append(linkMap[l.Source], l)
} else {
linkMap[l.Source] = []Link{l}
}
}
index.Links = linkMap
index.Backlinks = backlinkMap
return index
}
const message = "# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
index := index(links)
resStruct := struct{
Index Index
Links []Link
}{
Index: index,
Links: links,
}
marshalledIndex, mErr := yaml.Marshal(&resStruct)
if mErr != nil {
return mErr
}
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
if writeErr != nil {
return writeErr
}
if toIndex {
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
if mcErr != nil {
return mcErr
}
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
if writeErr != nil {
return writeErr
}
}
return nil
} }
func main() { func main() {

54
parse.go Normal file
View File

@ -0,0 +1,54 @@
package main
import (
"bytes"
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"path/filepath"
"strings"
)
// parse single file for links
func parse(dir, pathPrefix string) []Link {
// read file
source, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
// parse md
var links []Link
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
var buf bytes.Buffer
if err := md.Convert(source, &buf); err != nil {
panic(err)
}
doc, err := goquery.NewDocumentFromReader(&buf)
var n int
doc.Find("a").Each(func(i int, s *goquery.Selection) {
text := strings.TrimSpace(s.Text())
target, ok := s.Attr("href")
if !ok {
target = "#"
}
target = strings.Replace(target, "%20", " ", -1)
target = strings.Split(processTarget(target), "#")[0]
target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1)
fmt.Printf(" '%s' => %s\n", text, target)
links = append(links, Link{
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
Target: target,
Text: text,
})
n++
})
fmt.Printf(" Found: %d links\n", n)
return links
}

43
util.go Normal file
View File

@ -0,0 +1,43 @@
package main
import (
"fmt"
"path/filepath"
"strings"
)
func trim(source, prefix, suffix string) string {
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
}
func hugoPathTrim(source string) string {
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
}
func processTarget(source string) string {
if !isInternal(source) {
return source
}
if strings.HasPrefix(source, "/") {
return strings.TrimSuffix(source, ".md")
}
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
}
func isInternal(link string) bool {
return !strings.HasPrefix(link, "http")
}
// filter out certain links (e.g. to media)
func filter(links []Link) (res []Link) {
for _, l := range links {
// filter external and non-md
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
if isInternal(l.Target) && isMarkdown {
res = append(res, l)
}
}
fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res))
return res
}

74
walk.go Normal file
View File

@ -0,0 +1,74 @@
package main
import (
"fmt"
"github.com/gernest/front"
"io/fs"
"io/ioutil"
"path/filepath"
"strings"
)
// recursively walk directory and return all files with given extension
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
fmt.Printf("Scraping %s\n", root)
i = make(ContentIndex)
m := front.NewMatter()
m.Handle("---", front.YAMLHandler)
nPrivate := 0
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
if e != nil {
return e
}
if filepath.Ext(d.Name()) == ext {
res = append(res, parse(s, root)...)
if index {
text := getText(s)
frontmatter, body, err := m.Parse(strings.NewReader(text))
if err != nil {
frontmatter = map[string]interface{}{}
body = text
}
var title string
if parsedTitle, ok := frontmatter["title"]; ok {
title = parsedTitle.(string)
} else {
title = "Untitled Page"
}
// check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)
i[adjustedPath] = Content{
Title: title,
Content: body,
}
} else {
nPrivate++
}
}
}
return nil
})
if err != nil {
panic(err)
}
fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res))
return res, i
}
func getText(dir string) string {
// read file
fileBytes, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
return string(fileBytes)
}

69
write.go Normal file
View File

@ -0,0 +1,69 @@
package main
import (
"gopkg.in/yaml.v3"
"io/ioutil"
"path"
)
const message = "# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
index := index(links)
resStruct := struct {
Index Index
Links []Link
}{
Index: index,
Links: links,
}
marshalledIndex, mErr := yaml.Marshal(&resStruct)
if mErr != nil {
return mErr
}
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
if writeErr != nil {
return writeErr
}
if toIndex {
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
if mcErr != nil {
return mcErr
}
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
if writeErr != nil {
return writeErr
}
}
return nil
}
// constructs index from links
func index(links []Link) (index Index) {
linkMap := make(map[string][]Link)
backlinkMap := make(map[string][]Link)
for _, l := range links {
// backlink (only if internal)
if _, ok := backlinkMap[l.Target]; ok {
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
} else {
backlinkMap[l.Target] = []Link{l}
}
// regular link
if _, ok := linkMap[l.Source]; ok {
linkMap[l.Source] = append(linkMap[l.Source], l)
} else {
linkMap[l.Source] = []Link{l}
}
}
index.Links = linkMap
index.Backlinks = backlinkMap
return index
}