From 31b4ffca8977224993a874463c33cf38a600e912 Mon Sep 17 00:00:00 2001 From: Jacky Zhao Date: Mon, 27 Dec 2021 20:46:47 -0500 Subject: [PATCH] better char escaping --- parse.go | 6 ++++-- util.go | 40 ++++++++++++++++++++++++++++++++++++++++ walk.go | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/parse.go b/parse.go index 0583c6b..e85f778 100644 --- a/parse.go +++ b/parse.go @@ -40,10 +40,12 @@ func parse(dir, pathPrefix string) []Link { target = strings.TrimSpace(target) target = strings.Replace(target, " ", "-", -1) + source := filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))) + fmt.Printf(" '%s' => %s\n", text, target) links = append(links, Link{ - Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))), - Target: target, + Source: UnicodeSanitize(source), + Target: UnicodeSanitize(target), Text: text, }) n++ diff --git a/util.go b/util.go index ab93bd6..70e3716 100644 --- a/util.go +++ b/util.go @@ -4,6 +4,7 @@ import ( "fmt" "path/filepath" "strings" + "unicode" ) func trim(source, prefix, suffix string) string { @@ -28,6 +29,45 @@ func isInternal(link string) bool { return !strings.HasPrefix(link, "http") } +// From https://golang.org/src/net/url/url.go +func ishex(c rune) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +// UnicodeSanitize sanitizes string to be used in Hugo URL's +// from https://github.com/gohugoio/hugo/blob/93aad3c543828efca2adeb7f96cf50ae29878593/helpers/path.go#L94 +func UnicodeSanitize(s string) string { + source := []rune(s) + target := make([]rune, 0, len(source)) + var prependHyphen bool + + for i, r := range source { + isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' + isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) + isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2])) + + if isAllowed { + if prependHyphen { + target = append(target, '-') + prependHyphen = false + } + target = append(target, r) + } else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) { + prependHyphen = true + } + } + + return string(target) +} + // filter out certain links (e.g. to media) func filter(links []Link) (res []Link) { for _, l := range links { diff --git a/walk.go b/walk.go index 2df700c..fba7523 100644 --- a/walk.go +++ b/walk.go @@ -47,7 +47,7 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res [] // check if page is private if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) { info, _ := os.Stat(s) - adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1) + adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1)) i[adjustedPath] = Content{ LastModified: info.ModTime(), Title: title,