mirror of
https://github.com/jackyzha0/hugo-obsidian.git
synced 2024-06-14 11:42:35 +03:00
minor file refactoring
This commit is contained in:
parent
12376291c9
commit
3b5ed071b3
45
contentIndex.yaml
Normal file
45
contentIndex.yaml
Normal file
@ -0,0 +1,45 @@
|
||||
# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian
|
||||
# DO NOT EDIT
|
||||
README:
|
||||
title: Untitled Page
|
||||
content: |
|
||||
# Obsidian Link Scrapper
|
||||
Used by [Quartz](https://github.com/jackyzha0/quartz)
|
||||
|
||||
This repository comes to you in two parts.
|
||||
|
||||
1. GitHub Action (scrapes links into a `.yml` file)
|
||||
2. Hugo Partial (turns `.yml` file into graphs and tables)
|
||||
|
||||
## GitHub Action
|
||||
GitHub action and binary to scrape [Obsidian](http://obsidian.md/) vault for links and exposes them as a `.yml` file for easy consumption by [Hugo](https://gohugo.io/).
|
||||
### Example Usage (Binary)
|
||||
Read Markdown from the `/content` folder and place the resulting `linkIndex.yaml` (and `contentIndex.yaml` if the `index` flag is enabled) into `/data`
|
||||
|
||||
```shell
|
||||
# Installation
|
||||
go install github.com/jackyzha0/hugo-obsidian
|
||||
|
||||
# Run
|
||||
hugo-obsidian -input=content -output=data -index=true
|
||||
```
|
||||
|
||||
### Example Usage (GitHub Action)
|
||||
|
||||
Add 'Build Link Index' as a build step in your workflow file (e.g. `.github/workflows/deploy.yaml`)
|
||||
```yaml
|
||||
...
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build Link Index
|
||||
uses: jackyzha0/hugo-obsidian@v2.1
|
||||
with:
|
||||
input: content # input folder
|
||||
output: data # output folder
|
||||
index: true # whether to index content
|
||||
...
|
||||
```
|
1
go.mod
1
go.mod
@ -3,6 +3,7 @@ module github.com/jackyzha0/hugo-obsidian
|
||||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v0.4.1 // indirect
|
||||
github.com/PuerkitoBio/goquery v1.8.0
|
||||
github.com/abhinav/goldmark-wikilink v0.3.0
|
||||
github.com/gernest/front v0.0.0-20210301115436-8a0b0a782d0a
|
||||
|
2
go.sum
2
go.sum
@ -1,3 +1,5 @@
|
||||
github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw=
|
||||
github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
|
||||
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||
github.com/abhinav/goldmark-wikilink v0.3.0 h1:ry8CBaULn410PKCSkwLz/WVI2f/g7EB+yqY7LKHDcPQ=
|
||||
|
6
linkIndex.yaml
Normal file
6
linkIndex.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian
|
||||
# DO NOT EDIT
|
||||
index:
|
||||
links: {}
|
||||
backlinks: {}
|
||||
links: []
|
212
main.go
212
main.go
@ -1,24 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/gernest/front"
|
||||
"gopkg.in/yaml.v3"
|
||||
"io/fs"
|
||||
"io/ioutil"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
wikilink "github.com/abhinav/goldmark-wikilink"
|
||||
"github.com/yuin/goldmark"
|
||||
)
|
||||
|
||||
|
||||
var md goldmark.Markdown
|
||||
|
||||
func init() {
|
||||
md = goldmark.New(
|
||||
goldmark.WithExtensions(&wikilink.Extender{}),
|
||||
@ -44,205 +33,12 @@ type Content struct {
|
||||
|
||||
type ContentIndex = map[string]Content
|
||||
|
||||
func trim(source, prefix, suffix string) string {
|
||||
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
|
||||
type IgnoredFiles struct {
|
||||
|
||||
}
|
||||
|
||||
func hugoPathTrim(source string) string {
|
||||
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
|
||||
}
|
||||
func getIgnoredFiles() {
|
||||
|
||||
func processTarget(source string) string {
|
||||
if !isInternal(source) {
|
||||
return source
|
||||
}
|
||||
if strings.HasPrefix(source, "/") {
|
||||
return strings.TrimSuffix(source, ".md")
|
||||
}
|
||||
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
|
||||
}
|
||||
|
||||
func isInternal(link string) bool {
|
||||
return !strings.HasPrefix(link, "http")
|
||||
}
|
||||
|
||||
// parse single file for links
|
||||
func parse(dir, pathPrefix string) []Link {
|
||||
// read file
|
||||
source, err := ioutil.ReadFile(dir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// parse md
|
||||
var links []Link
|
||||
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := md.Convert(source, &buf); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(&buf)
|
||||
var n int
|
||||
doc.Find("a").Each(func(i int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
target, ok := s.Attr("href")
|
||||
if !ok {
|
||||
target = "#"
|
||||
}
|
||||
|
||||
target = strings.Replace(target, "%20", " ", -1)
|
||||
target = strings.Split(processTarget(target), "#")[0]
|
||||
target = strings.TrimSpace(target)
|
||||
target = strings.Replace(target, " ", "-", -1)
|
||||
|
||||
fmt.Printf(" '%s' => %s\n", text, target)
|
||||
links = append(links, Link{
|
||||
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
|
||||
Target: target,
|
||||
Text: text,
|
||||
})
|
||||
n++
|
||||
})
|
||||
fmt.Printf(":: %d links\n", n)
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
func getText(dir string) string {
|
||||
// read file
|
||||
bytes, err := ioutil.ReadFile(dir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
// recursively walk directory and return all files with given extension
|
||||
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
|
||||
println(root)
|
||||
i = make(ContentIndex)
|
||||
|
||||
m := front.NewMatter()
|
||||
m.Handle("---", front.YAMLHandler)
|
||||
nPrivate := 0
|
||||
|
||||
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
if filepath.Ext(d.Name()) == ext {
|
||||
res = append(res, parse(s, root)...)
|
||||
if index {
|
||||
text := getText(s)
|
||||
|
||||
frontmatter, body, err := m.Parse(strings.NewReader(text))
|
||||
if err != nil {
|
||||
frontmatter = map[string]interface{}{}
|
||||
body = text
|
||||
}
|
||||
|
||||
var title string
|
||||
if parsedTitle, ok := frontmatter["title"]; ok {
|
||||
title = parsedTitle.(string)
|
||||
} else {
|
||||
title = "Untitled Page"
|
||||
}
|
||||
|
||||
// check if page is private
|
||||
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
|
||||
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)
|
||||
i[adjustedPath] = Content{
|
||||
Title: title,
|
||||
Content: body,
|
||||
}
|
||||
} else {
|
||||
nPrivate++
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("Ignored %d private files \n", nPrivate)
|
||||
fmt.Printf("Parsed %d total links \n", len(res))
|
||||
return res, i
|
||||
}
|
||||
|
||||
// filter out certain links (e.g. to media)
|
||||
func filter(links []Link) (res []Link) {
|
||||
for _, l := range links {
|
||||
// filter external and non-md
|
||||
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
|
||||
if isInternal(l.Target) && isMarkdown {
|
||||
res = append(res, l)
|
||||
}
|
||||
}
|
||||
fmt.Printf("Removed %d external and non-markdown links\n", len(links) - len(res))
|
||||
return res
|
||||
}
|
||||
|
||||
// constructs index from links
|
||||
func index(links []Link) (index Index) {
|
||||
linkMap := make(map[string][]Link)
|
||||
backlinkMap := make(map[string][]Link)
|
||||
for _, l := range links {
|
||||
// backlink (only if internal)
|
||||
if _, ok := backlinkMap[l.Target]; ok {
|
||||
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
|
||||
} else {
|
||||
backlinkMap[l.Target] = []Link{l}
|
||||
}
|
||||
|
||||
// regular link
|
||||
if _, ok := linkMap[l.Source]; ok {
|
||||
linkMap[l.Source] = append(linkMap[l.Source], l)
|
||||
} else {
|
||||
linkMap[l.Source] = []Link{l}
|
||||
}
|
||||
}
|
||||
index.Links = linkMap
|
||||
index.Backlinks = backlinkMap
|
||||
return index
|
||||
}
|
||||
|
||||
const message = "# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
|
||||
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
|
||||
index := index(links)
|
||||
resStruct := struct{
|
||||
Index Index
|
||||
Links []Link
|
||||
}{
|
||||
Index: index,
|
||||
Links: links,
|
||||
}
|
||||
marshalledIndex, mErr := yaml.Marshal(&resStruct)
|
||||
if mErr != nil {
|
||||
return mErr
|
||||
}
|
||||
|
||||
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
|
||||
if writeErr != nil {
|
||||
return writeErr
|
||||
}
|
||||
|
||||
if toIndex {
|
||||
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
|
||||
if mcErr != nil {
|
||||
return mcErr
|
||||
}
|
||||
|
||||
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
|
||||
if writeErr != nil {
|
||||
return writeErr
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
54
parse.go
Normal file
54
parse.go
Normal file
@ -0,0 +1,54 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parse single file for links
|
||||
func parse(dir, pathPrefix string) []Link {
|
||||
// read file
|
||||
source, err := ioutil.ReadFile(dir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// parse md
|
||||
var links []Link
|
||||
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := md.Convert(source, &buf); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(&buf)
|
||||
var n int
|
||||
doc.Find("a").Each(func(i int, s *goquery.Selection) {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
target, ok := s.Attr("href")
|
||||
if !ok {
|
||||
target = "#"
|
||||
}
|
||||
|
||||
target = strings.Replace(target, "%20", " ", -1)
|
||||
target = strings.Split(processTarget(target), "#")[0]
|
||||
target = strings.TrimSpace(target)
|
||||
target = strings.Replace(target, " ", "-", -1)
|
||||
|
||||
fmt.Printf(" '%s' => %s\n", text, target)
|
||||
links = append(links, Link{
|
||||
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
|
||||
Target: target,
|
||||
Text: text,
|
||||
})
|
||||
n++
|
||||
})
|
||||
fmt.Printf(" Found: %d links\n", n)
|
||||
|
||||
return links
|
||||
}
|
43
util.go
Normal file
43
util.go
Normal file
@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func trim(source, prefix, suffix string) string {
|
||||
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
|
||||
}
|
||||
|
||||
func hugoPathTrim(source string) string {
|
||||
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
|
||||
}
|
||||
|
||||
func processTarget(source string) string {
|
||||
if !isInternal(source) {
|
||||
return source
|
||||
}
|
||||
if strings.HasPrefix(source, "/") {
|
||||
return strings.TrimSuffix(source, ".md")
|
||||
}
|
||||
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
|
||||
}
|
||||
|
||||
func isInternal(link string) bool {
|
||||
return !strings.HasPrefix(link, "http")
|
||||
}
|
||||
|
||||
// filter out certain links (e.g. to media)
|
||||
func filter(links []Link) (res []Link) {
|
||||
for _, l := range links {
|
||||
// filter external and non-md
|
||||
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
|
||||
if isInternal(l.Target) && isMarkdown {
|
||||
res = append(res, l)
|
||||
}
|
||||
}
|
||||
fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res))
|
||||
return res
|
||||
}
|
||||
|
74
walk.go
Normal file
74
walk.go
Normal file
@ -0,0 +1,74 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gernest/front"
|
||||
"io/fs"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// recursively walk directory and return all files with given extension
|
||||
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
|
||||
fmt.Printf("Scraping %s\n", root)
|
||||
i = make(ContentIndex)
|
||||
|
||||
m := front.NewMatter()
|
||||
m.Handle("---", front.YAMLHandler)
|
||||
nPrivate := 0
|
||||
|
||||
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
if filepath.Ext(d.Name()) == ext {
|
||||
res = append(res, parse(s, root)...)
|
||||
if index {
|
||||
text := getText(s)
|
||||
|
||||
frontmatter, body, err := m.Parse(strings.NewReader(text))
|
||||
if err != nil {
|
||||
frontmatter = map[string]interface{}{}
|
||||
body = text
|
||||
}
|
||||
|
||||
var title string
|
||||
if parsedTitle, ok := frontmatter["title"]; ok {
|
||||
title = parsedTitle.(string)
|
||||
} else {
|
||||
title = "Untitled Page"
|
||||
}
|
||||
|
||||
// check if page is private
|
||||
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
|
||||
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)
|
||||
i[adjustedPath] = Content{
|
||||
Title: title,
|
||||
Content: body,
|
||||
}
|
||||
} else {
|
||||
nPrivate++
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("Ignored %d private files \n", nPrivate)
|
||||
fmt.Printf("Parsed %d total links \n", len(res))
|
||||
return res, i
|
||||
}
|
||||
|
||||
func getText(dir string) string {
|
||||
// read file
|
||||
fileBytes, err := ioutil.ReadFile(dir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return string(fileBytes)
|
||||
}
|
||||
|
69
write.go
Normal file
69
write.go
Normal file
@ -0,0 +1,69 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"gopkg.in/yaml.v3"
|
||||
"io/ioutil"
|
||||
"path"
|
||||
)
|
||||
|
||||
const message = "# THIS FILE WAS GENERATED USING github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
|
||||
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
|
||||
index := index(links)
|
||||
resStruct := struct {
|
||||
Index Index
|
||||
Links []Link
|
||||
}{
|
||||
Index: index,
|
||||
Links: links,
|
||||
}
|
||||
marshalledIndex, mErr := yaml.Marshal(&resStruct)
|
||||
if mErr != nil {
|
||||
return mErr
|
||||
}
|
||||
|
||||
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
|
||||
if writeErr != nil {
|
||||
return writeErr
|
||||
}
|
||||
|
||||
if toIndex {
|
||||
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
|
||||
if mcErr != nil {
|
||||
return mcErr
|
||||
}
|
||||
|
||||
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
|
||||
if writeErr != nil {
|
||||
return writeErr
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// constructs index from links
|
||||
func index(links []Link) (index Index) {
|
||||
linkMap := make(map[string][]Link)
|
||||
backlinkMap := make(map[string][]Link)
|
||||
for _, l := range links {
|
||||
// backlink (only if internal)
|
||||
if _, ok := backlinkMap[l.Target]; ok {
|
||||
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
|
||||
} else {
|
||||
backlinkMap[l.Target] = []Link{l}
|
||||
}
|
||||
|
||||
// regular link
|
||||
if _, ok := linkMap[l.Source]; ok {
|
||||
linkMap[l.Source] = append(linkMap[l.Source], l)
|
||||
} else {
|
||||
linkMap[l.Source] = []Link{l}
|
||||
}
|
||||
}
|
||||
index.Links = linkMap
|
||||
index.Backlinks = backlinkMap
|
||||
return index
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user