2021-12-27 16:19:05 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io/fs"
|
|
|
|
"io/ioutil"
|
2021-12-27 16:58:21 -05:00
|
|
|
"os"
|
2021-12-27 16:19:05 -05:00
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
2022-04-02 11:53:14 -07:00
|
|
|
"time"
|
2022-05-05 00:55:59 -04:00
|
|
|
|
|
|
|
"github.com/adrg/frontmatter"
|
|
|
|
"gopkg.in/yaml.v2"
|
2021-12-27 16:19:05 -05:00
|
|
|
)
|
|
|
|
|
2022-05-05 00:55:59 -04:00
|
|
|
type Front struct {
|
|
|
|
Title string `yaml:"title"`
|
|
|
|
Draft bool `yaml:"draft"`
|
|
|
|
}
|
|
|
|
|
2021-12-27 16:19:05 -05:00
|
|
|
// recursively walk directory and return all files with given extension
|
2021-12-27 16:51:36 -05:00
|
|
|
func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []Link, i ContentIndex) {
|
2021-12-27 16:19:05 -05:00
|
|
|
fmt.Printf("Scraping %s\n", root)
|
|
|
|
i = make(ContentIndex)
|
|
|
|
|
|
|
|
nPrivate := 0
|
|
|
|
|
2022-05-05 00:55:59 -04:00
|
|
|
formats := []*frontmatter.Format{
|
|
|
|
frontmatter.NewFormat("---", "---", yaml.Unmarshal),
|
|
|
|
}
|
|
|
|
|
2022-04-02 11:53:14 -07:00
|
|
|
start := time.Now()
|
|
|
|
|
2022-03-15 00:50:03 -07:00
|
|
|
err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error {
|
2021-12-27 16:19:05 -05:00
|
|
|
if e != nil {
|
|
|
|
return e
|
|
|
|
}
|
2022-03-15 00:50:03 -07:00
|
|
|
|
|
|
|
// path normalize fp
|
|
|
|
s := filepath.ToSlash(fp)
|
2021-12-27 16:51:36 -05:00
|
|
|
if _, ignored := ignorePaths[s]; ignored {
|
|
|
|
fmt.Printf("[Ignored] %s\n", d.Name())
|
2021-12-27 16:58:21 -05:00
|
|
|
nPrivate++
|
2021-12-27 16:51:36 -05:00
|
|
|
} else if filepath.Ext(d.Name()) == ext {
|
2021-12-27 16:19:05 -05:00
|
|
|
if index {
|
|
|
|
text := getText(s)
|
|
|
|
|
2022-05-05 00:55:59 -04:00
|
|
|
var matter Front
|
|
|
|
raw_body, err := frontmatter.Parse(strings.NewReader(text), &matter, formats...)
|
|
|
|
body := string(raw_body)
|
2021-12-27 16:19:05 -05:00
|
|
|
if err != nil {
|
2022-05-05 00:55:59 -04:00
|
|
|
matter = Front{
|
|
|
|
Title: "Untitled Page",
|
|
|
|
Draft: false,
|
|
|
|
}
|
2021-12-27 16:19:05 -05:00
|
|
|
body = text
|
|
|
|
}
|
|
|
|
// check if page is private
|
2022-05-05 00:55:59 -04:00
|
|
|
if !matter.Draft {
|
2021-12-27 16:58:21 -05:00
|
|
|
info, _ := os.Stat(s)
|
2022-04-02 11:53:14 -07:00
|
|
|
source := processSource(trim(s, root, ".md"))
|
|
|
|
|
2023-02-25 13:00:05 -08:00
|
|
|
// default title
|
|
|
|
title := matter.Title
|
|
|
|
if title == "" {
|
|
|
|
fileName := d.Name()
|
|
|
|
title = strings.TrimSuffix(filepath.Base(fileName), filepath.Ext(fileName))
|
|
|
|
}
|
|
|
|
|
2022-07-31 09:46:39 -07:00
|
|
|
// add to content and link index
|
2022-04-02 11:53:14 -07:00
|
|
|
i[source] = Content{
|
2021-12-27 16:58:21 -05:00
|
|
|
LastModified: info.ModTime(),
|
2023-02-25 13:00:05 -08:00
|
|
|
Title: title,
|
2022-03-15 00:50:03 -07:00
|
|
|
Content: body,
|
2021-12-27 16:19:05 -05:00
|
|
|
}
|
2022-07-31 09:46:39 -07:00
|
|
|
res = append(res, parse(s, root)...)
|
2021-12-27 16:19:05 -05:00
|
|
|
} else {
|
2021-12-27 16:58:21 -05:00
|
|
|
fmt.Printf("[Ignored] %s\n", d.Name())
|
2021-12-27 16:19:05 -05:00
|
|
|
nPrivate++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2022-04-02 11:53:14 -07:00
|
|
|
|
|
|
|
end := time.Now()
|
|
|
|
|
|
|
|
fmt.Printf("[DONE] in %s\n", end.Sub(start).Round(time.Millisecond))
|
2021-12-27 16:19:05 -05:00
|
|
|
fmt.Printf("Ignored %d private files \n", nPrivate)
|
|
|
|
fmt.Printf("Parsed %d total links \n", len(res))
|
|
|
|
return res, i
|
|
|
|
}
|
|
|
|
|
|
|
|
func getText(dir string) string {
|
|
|
|
// read file
|
|
|
|
fileBytes, err := ioutil.ReadFile(dir)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return string(fileBytes)
|
|
|
|
}
|