From 93d9ffe129df2a6289a6b3b6fde7def435fab8d6 Mon Sep 17 00:00:00 2001 From: Jacky Zhao Date: Sat, 2 Apr 2022 11:53:14 -0700 Subject: [PATCH 1/2] print improvements, fix unicode char parsing --- parse.go | 19 +++++++------------ util.go | 13 +++++++++++-- walk.go | 13 +++++++++++-- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/parse.go b/parse.go index e85f778..65b1510 100644 --- a/parse.go +++ b/parse.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/PuerkitoBio/goquery" "io/ioutil" - "path/filepath" "strings" ) @@ -19,7 +18,7 @@ func parse(dir, pathPrefix string) []Link { // parse md var links []Link - fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md")) + fmt.Printf("[Parsing note] %s => ", trim(dir, pathPrefix, ".md")) var buf bytes.Buffer if err := md.Convert(source, &buf); err != nil { @@ -35,22 +34,18 @@ func parse(dir, pathPrefix string) []Link { target = "#" } - target = strings.Replace(target, "%20", " ", -1) - target = strings.Split(processTarget(target), "#")[0] - target = strings.TrimSpace(target) - target = strings.Replace(target, " ", "-", -1) + target = processTarget(target) + source := processSource(trim(dir, pathPrefix, ".md")) - source := filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))) - - fmt.Printf(" '%s' => %s\n", text, target) + // fmt.Printf(" '%s' => %s\n", source, target) links = append(links, Link{ - Source: UnicodeSanitize(source), - Target: UnicodeSanitize(target), + Source: source, + Target: target, Text: text, }) n++ }) - fmt.Printf(" Found: %d links\n", n) + fmt.Printf("found: %d links\n", n) return links } diff --git a/util.go b/util.go index 70e3716..2dc42e1 100644 --- a/util.go +++ b/util.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "net/url" "path/filepath" "strings" "unicode" @@ -22,7 +23,16 @@ func processTarget(source string) string { if strings.HasPrefix(source, "/") { return strings.TrimSuffix(source, ".md") } - return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md") + res := "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md") + res = strings.Split(res, "#")[0] + res = strings.TrimSpace(res) + return UnicodeSanitize(res) +} + +func processSource(source string) string { + res := filepath.ToSlash(hugoPathTrim(source)) + res = UnicodeSanitize(res) + return strings.ReplaceAll(url.PathEscape(res), "%2F", "/") } func isInternal(link string) bool { @@ -80,4 +90,3 @@ func filter(links []Link) (res []Link) { fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res)) return res } - diff --git a/walk.go b/walk.go index f44e6a9..7833a92 100644 --- a/walk.go +++ b/walk.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strings" + "time" ) // recursively walk directory and return all files with given extension @@ -19,6 +20,8 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res [] m.Handle("---", front.YAMLHandler) nPrivate := 0 + start := time.Now() + err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error { if e != nil { return e @@ -50,8 +53,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res [] // check if page is private if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) { info, _ := os.Stat(s) - adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)) - i[adjustedPath] = Content{ + source := processSource(trim(s, root, ".md")) + + // adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)) + i[source] = Content{ LastModified: info.ModTime(), Title: title, Content: body, @@ -67,6 +72,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res [] if err != nil { panic(err) } + + end := time.Now() + + fmt.Printf("[DONE] in %s\n", end.Sub(start).Round(time.Millisecond)) fmt.Printf("Ignored %d private files \n", nPrivate) fmt.Printf("Parsed %d total links \n", len(res)) return res, i From 605cd60c6f936381d25be0753ce0e3e690df7118 Mon Sep 17 00:00:00 2001 From: Jacky Zhao Date: Sat, 2 Apr 2022 12:51:56 -0700 Subject: [PATCH 2/2] more robust escaping for targets --- util.go | 1 + 1 file changed, 1 insertion(+) diff --git a/util.go b/util.go index 2dc42e1..3694820 100644 --- a/util.go +++ b/util.go @@ -24,6 +24,7 @@ func processTarget(source string) string { return strings.TrimSuffix(source, ".md") } res := "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md") + res, _ = url.PathUnescape(res) res = strings.Split(res, "#")[0] res = strings.TrimSpace(res) return UnicodeSanitize(res)