2021-12-27 16:19:05 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2022-04-02 11:53:14 -07:00
|
|
|
"net/url"
|
2021-12-27 16:19:05 -05:00
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
2021-12-27 20:46:47 -05:00
|
|
|
"unicode"
|
2021-12-27 16:19:05 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
func trim(source, prefix, suffix string) string {
|
|
|
|
return strings.TrimPrefix(strings.TrimSuffix(source, suffix), prefix)
|
|
|
|
}
|
|
|
|
|
|
|
|
func hugoPathTrim(source string) string {
|
|
|
|
return strings.TrimSuffix(strings.TrimSuffix(source, "/index"), "_index")
|
|
|
|
}
|
|
|
|
|
|
|
|
func processTarget(source string) string {
|
|
|
|
if !isInternal(source) {
|
|
|
|
return source
|
|
|
|
}
|
|
|
|
if strings.HasPrefix(source, "/") {
|
|
|
|
return strings.TrimSuffix(source, ".md")
|
|
|
|
}
|
2022-08-23 23:27:12 -05:00
|
|
|
res := strings.Split(source, "#")[0]
|
|
|
|
res = "/" + strings.TrimSuffix(strings.TrimSuffix(res, ".html"), ".md")
|
2022-04-02 12:51:56 -07:00
|
|
|
res, _ = url.PathUnescape(res)
|
2022-04-02 11:53:14 -07:00
|
|
|
res = strings.TrimSpace(res)
|
2022-04-02 17:43:20 -07:00
|
|
|
res = UnicodeSanitize(res)
|
|
|
|
return strings.ReplaceAll(url.PathEscape(res), "%2F", "/")
|
2022-04-02 11:53:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func processSource(source string) string {
|
|
|
|
res := filepath.ToSlash(hugoPathTrim(source))
|
|
|
|
res = UnicodeSanitize(res)
|
|
|
|
return strings.ReplaceAll(url.PathEscape(res), "%2F", "/")
|
2021-12-27 16:19:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func isInternal(link string) bool {
|
|
|
|
return !strings.HasPrefix(link, "http")
|
|
|
|
}
|
|
|
|
|
2021-12-27 20:46:47 -05:00
|
|
|
// From https://golang.org/src/net/url/url.go
|
|
|
|
func ishex(c rune) bool {
|
|
|
|
switch {
|
|
|
|
case '0' <= c && c <= '9':
|
|
|
|
return true
|
|
|
|
case 'a' <= c && c <= 'f':
|
|
|
|
return true
|
|
|
|
case 'A' <= c && c <= 'F':
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnicodeSanitize sanitizes string to be used in Hugo URL's
|
|
|
|
// from https://github.com/gohugoio/hugo/blob/93aad3c543828efca2adeb7f96cf50ae29878593/helpers/path.go#L94
|
|
|
|
func UnicodeSanitize(s string) string {
|
|
|
|
source := []rune(s)
|
|
|
|
target := make([]rune, 0, len(source))
|
|
|
|
var prependHyphen bool
|
|
|
|
|
|
|
|
for i, r := range source {
|
|
|
|
isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~'
|
|
|
|
isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r)
|
|
|
|
isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]))
|
|
|
|
|
|
|
|
if isAllowed {
|
|
|
|
if prependHyphen {
|
|
|
|
target = append(target, '-')
|
|
|
|
prependHyphen = false
|
|
|
|
}
|
|
|
|
target = append(target, r)
|
|
|
|
} else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) {
|
|
|
|
prependHyphen = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return string(target)
|
|
|
|
}
|
|
|
|
|
2021-12-27 16:19:05 -05:00
|
|
|
// filter out certain links (e.g. to media)
|
|
|
|
func filter(links []Link) (res []Link) {
|
|
|
|
for _, l := range links {
|
|
|
|
// filter external and non-md
|
|
|
|
isMarkdown := filepath.Ext(l.Target) == "" || filepath.Ext(l.Target) == ".md"
|
|
|
|
if isInternal(l.Target) && isMarkdown {
|
|
|
|
res = append(res, l)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Printf("Removed %d external and non-markdown links\n", len(links)-len(res))
|
|
|
|
return res
|
|
|
|
}
|