print improvements, fix unicode char parsing

This commit is contained in:
Jacky Zhao 2022-04-02 11:53:14 -07:00
parent 6caa733a1f
commit 93d9ffe129
3 changed files with 29 additions and 16 deletions

View File

@ -5,7 +5,6 @@ import (
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"path/filepath"
"strings"
)
@ -19,7 +18,7 @@ func parse(dir, pathPrefix string) []Link {
// parse md
var links []Link
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md"))
fmt.Printf("[Parsing note] %s => ", trim(dir, pathPrefix, ".md"))
var buf bytes.Buffer
if err := md.Convert(source, &buf); err != nil {
@ -35,22 +34,18 @@ func parse(dir, pathPrefix string) []Link {
target = "#"
}
target = strings.Replace(target, "%20", " ", -1)
target = strings.Split(processTarget(target), "#")[0]
target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1)
target = processTarget(target)
source := processSource(trim(dir, pathPrefix, ".md"))
source := filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md")))
fmt.Printf(" '%s' => %s\n", text, target)
// fmt.Printf(" '%s' => %s\n", source, target)
links = append(links, Link{
Source: UnicodeSanitize(source),
Target: UnicodeSanitize(target),
Source: source,
Target: target,
Text: text,
})
n++
})
fmt.Printf(" Found: %d links\n", n)
fmt.Printf("found: %d links\n", n)
return links
}

13
util.go
View File

@ -2,6 +2,7 @@ package main
import (
"fmt"
"net/url"
"path/filepath"
"strings"
"unicode"
@ -22,7 +23,16 @@ func processTarget(source string) string {
if strings.HasPrefix(source, "/") {
return strings.TrimSuffix(source, ".md")
}
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
res := "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
res = strings.Split(res, "#")[0]
res = strings.TrimSpace(res)
return UnicodeSanitize(res)
}
func processSource(source string) string {
res := filepath.ToSlash(hugoPathTrim(source))
res = UnicodeSanitize(res)
return strings.ReplaceAll(url.PathEscape(res), "%2F", "/")
}
func isInternal(link string) bool {
@ -80,4 +90,3 @@ func filter(links []Link) (res []Link) {
fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res))
return res
}

13
walk.go
View File

@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
)
// recursively walk directory and return all files with given extension
@ -19,6 +20,8 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
m.Handle("---", front.YAMLHandler)
nPrivate := 0
start := time.Now()
err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error {
if e != nil {
return e
@ -50,8 +53,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
// check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
info, _ := os.Stat(s)
adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1))
i[adjustedPath] = Content{
source := processSource(trim(s, root, ".md"))
// adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1))
i[source] = Content{
LastModified: info.ModTime(),
Title: title,
Content: body,
@ -67,6 +72,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
if err != nil {
panic(err)
}
end := time.Now()
fmt.Printf("[DONE] in %s\n", end.Sub(start).Round(time.Millisecond))
fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res))
return res, i