2021-07-17 06:26:40 +03:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2021-07-17 07:51:06 +03:00
|
|
|
"flag"
|
2021-07-17 08:26:45 +03:00
|
|
|
"fmt"
|
2021-07-17 06:26:40 +03:00
|
|
|
md "github.com/nikitavoloboev/markdown-parser"
|
2021-07-17 07:38:12 +03:00
|
|
|
"gopkg.in/yaml.v3"
|
2021-07-17 06:26:40 +03:00
|
|
|
"io/fs"
|
|
|
|
"io/ioutil"
|
2021-07-17 07:51:06 +03:00
|
|
|
"path"
|
2021-07-17 06:26:40 +03:00
|
|
|
"path/filepath"
|
2021-08-25 20:05:12 +03:00
|
|
|
"regexp"
|
2021-07-17 06:32:47 +03:00
|
|
|
"strings"
|
2021-07-17 06:26:40 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
type Link struct {
|
|
|
|
Source string
|
|
|
|
Target string
|
|
|
|
Text string
|
|
|
|
}
|
|
|
|
|
2021-07-17 07:01:08 +03:00
|
|
|
type LinkTable = map[string][]Link
|
|
|
|
type Index struct {
|
|
|
|
Links LinkTable
|
|
|
|
Backlinks LinkTable
|
|
|
|
}
|
|
|
|
|
2021-08-25 20:05:12 +03:00
|
|
|
type Content struct {
|
|
|
|
Title string
|
|
|
|
Content string
|
|
|
|
}
|
|
|
|
|
|
|
|
type ContentIndex = map[string]Content
|
|
|
|
|
2021-07-17 06:32:47 +03:00
|
|
|
func trim(source, prefix, suffix string) string {
|
|
|
|
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
|
|
|
|
}
|
|
|
|
|
2021-07-17 18:39:53 +03:00
|
|
|
func hugoPathTrim(source string) string {
|
|
|
|
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
|
|
|
|
}
|
|
|
|
|
2021-07-18 18:50:13 +03:00
|
|
|
func processTarget(source string) string {
|
2021-07-18 23:30:37 +03:00
|
|
|
if !isInternal(source) {
|
|
|
|
return source
|
|
|
|
}
|
2021-07-18 18:50:13 +03:00
|
|
|
if strings.HasPrefix(source, "/") {
|
|
|
|
return strings.TrimSuffix(source, ".md")
|
|
|
|
}
|
|
|
|
return "/" + strings.TrimSuffix(source, ".md")
|
|
|
|
}
|
|
|
|
|
2021-07-18 23:30:37 +03:00
|
|
|
func isInternal(link string) bool {
|
|
|
|
return !strings.HasPrefix(link, "http")
|
|
|
|
}
|
|
|
|
|
2021-07-17 06:32:47 +03:00
|
|
|
// parse single file for links
|
|
|
|
func parse(dir, pathPrefix string) []Link {
|
2021-07-17 06:26:40 +03:00
|
|
|
// read file
|
|
|
|
bytes, err := ioutil.ReadFile(dir)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse md
|
|
|
|
var links []Link
|
2021-07-17 08:26:45 +03:00
|
|
|
fmt.Printf("%s\n", trim(dir, pathPrefix, ".md"))
|
2021-07-17 06:26:40 +03:00
|
|
|
for text, target := range md.GetAllLinks(string(bytes)) {
|
2021-08-25 20:05:12 +03:00
|
|
|
target := strings.Split(processTarget(target), "#")[0]
|
|
|
|
fmt.Printf(" %s\n", target)
|
2021-07-17 06:26:40 +03:00
|
|
|
links = append(links, Link{
|
2021-10-24 00:18:14 +03:00
|
|
|
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))),
|
2021-08-25 20:05:12 +03:00
|
|
|
Target: target,
|
2021-07-17 06:26:40 +03:00
|
|
|
Text: text,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return links
|
|
|
|
}
|
|
|
|
|
2021-08-25 20:05:12 +03:00
|
|
|
func getText(dir string) string {
|
|
|
|
// read file
|
|
|
|
bytes, err := ioutil.ReadFile(dir)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return string(bytes)
|
|
|
|
}
|
|
|
|
|
2021-07-17 06:32:47 +03:00
|
|
|
// recursively walk directory and return all files with given extension
|
2021-08-25 20:05:12 +03:00
|
|
|
func walk(root, ext string, index bool) (res []Link, i ContentIndex) {
|
2021-07-17 07:51:06 +03:00
|
|
|
println(root)
|
2021-08-25 20:05:12 +03:00
|
|
|
i = make(ContentIndex)
|
|
|
|
titleRegex := regexp.MustCompile(`title: "(.*)"`)
|
|
|
|
|
2021-07-17 07:01:08 +03:00
|
|
|
err := filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
|
2021-07-17 06:26:40 +03:00
|
|
|
if e != nil {
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
if filepath.Ext(d.Name()) == ext {
|
2021-07-17 07:01:08 +03:00
|
|
|
res = append(res, parse(s, root)...)
|
2021-08-25 20:05:12 +03:00
|
|
|
if index {
|
|
|
|
text := getText(s)
|
|
|
|
matches := titleRegex.FindStringSubmatch(text)
|
|
|
|
var title string
|
|
|
|
if len(matches) > 1 {
|
|
|
|
title = matches[1]
|
|
|
|
} else {
|
|
|
|
title = "Untitled Page"
|
|
|
|
}
|
|
|
|
|
|
|
|
adjustedPath := hugoPathTrim(trim(s, root, ".md"))
|
|
|
|
i[adjustedPath] = Content{
|
|
|
|
Title: title,
|
|
|
|
Content: text,
|
|
|
|
}
|
|
|
|
}
|
2021-07-17 06:26:40 +03:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
2021-07-17 07:01:08 +03:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2021-07-17 08:26:45 +03:00
|
|
|
fmt.Printf("parsed %d total links \n", len(res))
|
2021-08-25 20:05:12 +03:00
|
|
|
return res, i
|
2021-07-17 07:01:08 +03:00
|
|
|
}
|
|
|
|
|
2021-07-17 07:38:12 +03:00
|
|
|
// filter out certain links (e.g. to media)
|
|
|
|
func filter(links []Link) (res []Link) {
|
|
|
|
for _, l := range links {
|
|
|
|
// filter external and non-md
|
|
|
|
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
|
2021-07-18 23:30:37 +03:00
|
|
|
if isInternal(l.Target) && isMarkdown {
|
2021-07-17 07:38:12 +03:00
|
|
|
res = append(res, l)
|
|
|
|
}
|
|
|
|
}
|
2021-07-17 08:26:45 +03:00
|
|
|
fmt.Printf("removed %d external and non-markdown links\n", len(links) - len(res))
|
2021-07-17 07:38:12 +03:00
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2021-07-17 07:01:08 +03:00
|
|
|
// constructs index from links
|
|
|
|
func index(links []Link) (index Index) {
|
|
|
|
linkMap := make(map[string][]Link)
|
|
|
|
backlinkMap := make(map[string][]Link)
|
|
|
|
for _, l := range links {
|
2021-07-17 07:38:12 +03:00
|
|
|
// backlink (only if internal)
|
2021-07-17 18:39:53 +03:00
|
|
|
if _, ok := backlinkMap[l.Target]; ok {
|
2021-07-17 21:14:29 +03:00
|
|
|
backlinkMap[l.Target] = append(backlinkMap[l.Target], l)
|
2021-07-17 07:01:08 +03:00
|
|
|
} else {
|
2021-07-17 21:14:29 +03:00
|
|
|
backlinkMap[l.Target] = []Link{l}
|
2021-07-17 07:01:08 +03:00
|
|
|
}
|
|
|
|
|
2021-07-17 07:38:12 +03:00
|
|
|
// regular link
|
2021-07-17 21:14:29 +03:00
|
|
|
if _, ok := linkMap[l.Source]; ok {
|
|
|
|
linkMap[l.Source] = append(linkMap[l.Source], l)
|
2021-07-17 07:01:08 +03:00
|
|
|
} else {
|
2021-07-17 21:14:29 +03:00
|
|
|
linkMap[l.Source] = []Link{l}
|
2021-07-17 07:01:08 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
index.Links = linkMap
|
|
|
|
index.Backlinks = backlinkMap
|
|
|
|
return index
|
2021-07-17 06:26:40 +03:00
|
|
|
}
|
|
|
|
|
2021-07-18 18:01:42 +03:00
|
|
|
const message = "# THIS FILE WAS GENERATED using github.com/jackyzha0/hugo-obsidian\n# DO NOT EDIT\n"
|
2021-08-25 20:05:12 +03:00
|
|
|
func write(links []Link, contentIndex ContentIndex, toIndex bool, out string) error {
|
2021-07-17 07:51:06 +03:00
|
|
|
index := index(links)
|
|
|
|
resStruct := struct{
|
|
|
|
Index Index
|
|
|
|
Links []Link
|
|
|
|
}{
|
|
|
|
Index: index,
|
|
|
|
Links: links,
|
|
|
|
}
|
|
|
|
marshalledIndex, mErr := yaml.Marshal(&resStruct)
|
2021-07-17 07:38:12 +03:00
|
|
|
if mErr != nil {
|
|
|
|
return mErr
|
|
|
|
}
|
|
|
|
|
2021-07-18 18:01:42 +03:00
|
|
|
writeErr := ioutil.WriteFile(path.Join(out, "linkIndex.yaml"), append([]byte(message), marshalledIndex...), 0644)
|
2021-07-17 07:38:12 +03:00
|
|
|
if writeErr != nil {
|
|
|
|
return writeErr
|
|
|
|
}
|
2021-08-25 20:05:12 +03:00
|
|
|
|
|
|
|
if toIndex {
|
|
|
|
marshalledContentIndex, mcErr := yaml.Marshal(&contentIndex)
|
|
|
|
if mcErr != nil {
|
|
|
|
return mcErr
|
|
|
|
}
|
|
|
|
|
|
|
|
writeErr = ioutil.WriteFile(path.Join(out, "contentIndex.yaml"), append([]byte(message), marshalledContentIndex...), 0644)
|
|
|
|
if writeErr != nil {
|
|
|
|
return writeErr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-17 07:38:12 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-17 06:26:40 +03:00
|
|
|
func main() {
|
2021-07-17 07:51:06 +03:00
|
|
|
in := flag.String("input", ".", "Input Directory")
|
|
|
|
out := flag.String("output", ".", "Output Directory")
|
2021-08-25 20:05:12 +03:00
|
|
|
index := flag.Bool("index", false, "Whether to index the content")
|
2021-07-17 07:51:06 +03:00
|
|
|
flag.Parse()
|
2021-08-25 20:05:12 +03:00
|
|
|
l, i := walk(*in, ".md", *index)
|
2021-07-17 07:38:12 +03:00
|
|
|
f := filter(l)
|
2021-08-25 20:05:12 +03:00
|
|
|
err := write(f, i, *index, *out)
|
2021-07-17 07:38:12 +03:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2021-07-17 06:26:40 +03:00
|
|
|
}
|