print improvements, fix unicode char parsing

This commit is contained in:
Jacky Zhao 2022-04-02 11:53:14 -07:00
parent 6caa733a1f
commit 93d9ffe129
3 changed files with 29 additions and 16 deletions

View File

@ -5,7 +5,6 @@ import (
"fmt" "fmt"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"io/ioutil" "io/ioutil"
"path/filepath"
"strings" "strings"
) )
@ -19,7 +18,7 @@ func parse(dir, pathPrefix string) []Link {
// parse md // parse md
var links []Link var links []Link
fmt.Printf("[Parsing note] %s\n", trim(dir, pathPrefix, ".md")) fmt.Printf("[Parsing note] %s => ", trim(dir, pathPrefix, ".md"))
var buf bytes.Buffer var buf bytes.Buffer
if err := md.Convert(source, &buf); err != nil { if err := md.Convert(source, &buf); err != nil {
@ -35,22 +34,18 @@ func parse(dir, pathPrefix string) []Link {
target = "#" target = "#"
} }
target = strings.Replace(target, "%20", " ", -1) target = processTarget(target)
target = strings.Split(processTarget(target), "#")[0] source := processSource(trim(dir, pathPrefix, ".md"))
target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1)
source := filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))) // fmt.Printf(" '%s' => %s\n", source, target)
fmt.Printf(" '%s' => %s\n", text, target)
links = append(links, Link{ links = append(links, Link{
Source: UnicodeSanitize(source), Source: source,
Target: UnicodeSanitize(target), Target: target,
Text: text, Text: text,
}) })
n++ n++
}) })
fmt.Printf(" Found: %d links\n", n) fmt.Printf("found: %d links\n", n)
return links return links
} }

13
util.go
View File

@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"net/url"
"path/filepath" "path/filepath"
"strings" "strings"
"unicode" "unicode"
@ -22,7 +23,16 @@ func processTarget(source string) string {
if strings.HasPrefix(source, "/") { if strings.HasPrefix(source, "/") {
return strings.TrimSuffix(source, ".md") return strings.TrimSuffix(source, ".md")
} }
return "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md") res := "/" + strings.TrimSuffix(strings.TrimSuffix(source, ".html"), ".md")
res = strings.Split(res, "#")[0]
res = strings.TrimSpace(res)
return UnicodeSanitize(res)
}
func processSource(source string) string {
res := filepath.ToSlash(hugoPathTrim(source))
res = UnicodeSanitize(res)
return strings.ReplaceAll(url.PathEscape(res), "%2F", "/")
} }
func isInternal(link string) bool { func isInternal(link string) bool {
@ -80,4 +90,3 @@ func filter(links []Link) (res []Link) {
fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res)) fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res))
return res return res
} }

13
walk.go
View File

@ -8,6 +8,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"time"
) )
// recursively walk directory and return all files with given extension // recursively walk directory and return all files with given extension
@ -19,6 +20,8 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
m.Handle("---", front.YAMLHandler) m.Handle("---", front.YAMLHandler)
nPrivate := 0 nPrivate := 0
start := time.Now()
err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error { err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error {
if e != nil { if e != nil {
return e return e
@ -50,8 +53,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
// check if page is private // check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) { if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
info, _ := os.Stat(s) info, _ := os.Stat(s)
adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)) source := processSource(trim(s, root, ".md"))
i[adjustedPath] = Content{
// adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1))
i[source] = Content{
LastModified: info.ModTime(), LastModified: info.ModTime(),
Title: title, Title: title,
Content: body, Content: body,
@ -67,6 +72,10 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
if err != nil { if err != nil {
panic(err) panic(err)
} }
end := time.Now()
fmt.Printf("[DONE] in %s\n", end.Sub(start).Round(time.Millisecond))
fmt.Printf("Ignored %d private files \n", nPrivate) fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res)) fmt.Printf("Parsed %d total links \n", len(res))
return res, i return res, i