better char escaping

This commit is contained in:
Jacky Zhao 2021-12-27 20:46:47 -05:00
parent 9a48624656
commit 31b4ffca89
3 changed files with 45 additions and 3 deletions

View File

@ -40,10 +40,12 @@ func parse(dir, pathPrefix string) []Link {
target = strings.TrimSpace(target) target = strings.TrimSpace(target)
target = strings.Replace(target, " ", "-", -1) target = strings.Replace(target, " ", "-", -1)
source := filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md")))
fmt.Printf(" '%s' => %s\n", text, target) fmt.Printf(" '%s' => %s\n", text, target)
links = append(links, Link{ links = append(links, Link{
Source: filepath.ToSlash(hugoPathTrim(trim(dir, pathPrefix, ".md"))), Source: UnicodeSanitize(source),
Target: target, Target: UnicodeSanitize(target),
Text: text, Text: text,
}) })
n++ n++

40
util.go
View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"strings" "strings"
"unicode"
) )
func trim(source, prefix, suffix string) string { func trim(source, prefix, suffix string) string {
@ -28,6 +29,45 @@ func isInternal(link string) bool {
return !strings.HasPrefix(link, "http") return !strings.HasPrefix(link, "http")
} }
// From https://golang.org/src/net/url/url.go
func ishex(c rune) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}
// UnicodeSanitize sanitizes string to be used in Hugo URL's
// from https://github.com/gohugoio/hugo/blob/93aad3c543828efca2adeb7f96cf50ae29878593/helpers/path.go#L94
func UnicodeSanitize(s string) string {
source := []rune(s)
target := make([]rune, 0, len(source))
var prependHyphen bool
for i, r := range source {
isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~'
isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r)
isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]))
if isAllowed {
if prependHyphen {
target = append(target, '-')
prependHyphen = false
}
target = append(target, r)
} else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) {
prependHyphen = true
}
}
return string(target)
}
// filter out certain links (e.g. to media) // filter out certain links (e.g. to media)
func filter(links []Link) (res []Link) { func filter(links []Link) (res []Link) {
for _, l := range links { for _, l := range links {

View File

@ -47,7 +47,7 @@ func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []
// check if page is private // check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) { if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
info, _ := os.Stat(s) info, _ := os.Stat(s)
adjustedPath := strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1) adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1))
i[adjustedPath] = Content{ i[adjustedPath] = Content{
LastModified: info.ModTime(), LastModified: info.ModTime(),
Title: title, Title: title,