mirror of
https://github.com/squidfunk/mkdocs-material.git
synced 2024-06-14 11:52:32 +03:00
Fixed highlighting of tags
This commit is contained in:
parent
ee1496499a
commit
24a3be8f04
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
material/assets/stylesheets/extra.d35223bf.min.css
vendored
Normal file
1
material/assets/stylesheets/extra.d35223bf.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1
material/assets/stylesheets/extra.d35223bf.min.css.map
Normal file
1
material/assets/stylesheets/extra.d35223bf.min.css.map
Normal file
File diff suppressed because one or more lines are too long
@ -211,7 +211,7 @@
|
||||
"base": base_url,
|
||||
"features": features,
|
||||
"translations": {},
|
||||
"search": "assets/javascripts/workers/search.208e55ea.min.js" | url
|
||||
"search": "assets/javascripts/workers/search.f5389c75.min.js" | url
|
||||
} -%}
|
||||
{%- if config.extra.version -%}
|
||||
{%- set _ = app.update({ "version": config.extra.version }) -%}
|
||||
@ -239,13 +239,13 @@
|
||||
</script>
|
||||
{% endblock %}
|
||||
{% block scripts %}
|
||||
<script src="{{ 'assets/javascripts/bundle.f1ef77e2.min.js' | url }}"></script>
|
||||
<script src="{{ 'assets/javascripts/bundle.ce0331ff.min.js' | url }}"></script>
|
||||
{% for path in config.extra_javascript %}
|
||||
<script src="{{ path | url }}"></script>
|
||||
{% endfor %}
|
||||
{% endblock %}
|
||||
{% if page.meta and page.meta.ᴴₒᴴₒᴴₒ %}
|
||||
<link rel="stylesheet" href="{{ 'assets/stylesheets/extra.b3906f4e.min.css' | url }}">
|
||||
<link rel="stylesheet" href="{{ 'assets/stylesheets/extra.d35223bf.min.css' | url }}">
|
||||
<script src="{{ 'assets/javascripts/extra/bundle.f719a234.min.js' | url }}" defer></script>
|
||||
{% endif %}
|
||||
</body>
|
||||
|
@ -30,6 +30,7 @@ import {
|
||||
Position,
|
||||
PositionTable,
|
||||
highlight,
|
||||
highlightAll,
|
||||
tokenize
|
||||
} from "../internal"
|
||||
import {
|
||||
@ -46,7 +47,9 @@ import {
|
||||
/**
|
||||
* Search item
|
||||
*/
|
||||
export interface SearchItem extends SearchDocument {
|
||||
export interface SearchItem
|
||||
extends SearchDocument
|
||||
{
|
||||
score: number /* Score (relevance) */
|
||||
terms: SearchQueryTerms /* Search query terms */
|
||||
}
|
||||
@ -213,6 +216,8 @@ export class Search {
|
||||
.reduce<SearchItem[]>((item, { ref, score, matchData }) => {
|
||||
let doc = this.map.get(ref)
|
||||
if (typeof doc !== "undefined") {
|
||||
|
||||
/* Shallow copy document */
|
||||
doc = { ...doc }
|
||||
if (doc.tags)
|
||||
doc.tags = [...doc.tags]
|
||||
@ -223,39 +228,29 @@ export class Search {
|
||||
Object.keys(matchData.metadata)
|
||||
)
|
||||
|
||||
// we must collect all positions for each term!
|
||||
// we now take the keys of the index
|
||||
/* Highlight matches in fields */
|
||||
for (const field of this.index.fields) {
|
||||
if (!(field in doc))
|
||||
if (typeof doc[field] === "undefined")
|
||||
continue
|
||||
|
||||
/* Collect matches */
|
||||
/* Collect positions from matches */
|
||||
const positions: Position[] = []
|
||||
for (const match of Object.values(matchData.metadata))
|
||||
if (field in match)
|
||||
if (typeof match[field] !== "undefined")
|
||||
positions.push(...match[field].position)
|
||||
|
||||
/* Skip field, if no highlighting is necessary */
|
||||
/* Skip highlighting, if no positions were collected */
|
||||
if (!positions.length)
|
||||
continue
|
||||
|
||||
// @ts-expect-error - @todo fix typings
|
||||
if (Array.isArray(doc[field])) {
|
||||
// @ts-expect-error - @todo fix typings
|
||||
for (let i = 0; i < doc[field].length; i++) {
|
||||
// @ts-expect-error - @todo fix typings
|
||||
doc[field][i] = highlight(doc[field][i],
|
||||
this.table.get([doc.location, field].join(":"))!,
|
||||
positions
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// @ts-expect-error - @todo fix typings
|
||||
doc[field] = highlight(doc[field],
|
||||
this.table.get([doc.location, field].join(":"))!,
|
||||
positions
|
||||
)
|
||||
}
|
||||
/* Load table and determine highlighting method */
|
||||
const table = this.table.get([doc.location, field].join(":"))!
|
||||
const fn = Array.isArray(doc[field])
|
||||
? highlightAll
|
||||
: highlight
|
||||
|
||||
// @ts-expect-error - stop moaning, TypeScript!
|
||||
doc[field] = fn(doc[field], table, positions)
|
||||
}
|
||||
|
||||
/* Highlight title and text and apply post-query boosts */
|
||||
|
@ -41,15 +41,12 @@ type VisitorFn = (
|
||||
/**
|
||||
* Split a string using the given separator
|
||||
*
|
||||
* This function intentionally expects a visitor function argument, as opposed
|
||||
* to collecting and returning all sections, for better memory efficiency.
|
||||
*
|
||||
* @param value - String value
|
||||
* @param input - Input value
|
||||
* @param separator - Separator
|
||||
* @param fn - Visitor function
|
||||
*/
|
||||
export function split(
|
||||
value: string, separator: RegExp, fn: VisitorFn
|
||||
input: string, separator: RegExp, fn: VisitorFn
|
||||
): void {
|
||||
separator = new RegExp(separator, "g")
|
||||
|
||||
@ -57,10 +54,10 @@ export function split(
|
||||
let match: RegExpExecArray | null
|
||||
let index = 0
|
||||
do {
|
||||
match = separator.exec(value)
|
||||
match = separator.exec(input)
|
||||
|
||||
/* Emit non-empty range */
|
||||
const until = match?.index ?? value.length
|
||||
const until = match?.index ?? input.length
|
||||
if (index < until)
|
||||
fn(index, until)
|
||||
|
||||
|
@ -20,6 +20,24 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Types
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Extraction type
|
||||
*
|
||||
* This type defines the possible values that are encoded into the first two
|
||||
* bits of a section that is part of the blocks of a tokenization table. There
|
||||
* are three types of interest: HTML opening and closing tags, as well as the
|
||||
* actual text content we need to extract for indexing.
|
||||
*/
|
||||
export const enum Extract {
|
||||
TAG_OPEN = 0, /* HTML opening tag */
|
||||
TEXT = 1, /* Text content */
|
||||
TAG_CLOSE = 2 /* HTML closing tag */
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Helper types
|
||||
* ------------------------------------------------------------------------- */
|
||||
@ -28,12 +46,12 @@
|
||||
* Visitor function
|
||||
*
|
||||
* @param block - Block index
|
||||
* @param operation - Operation index
|
||||
* @param type - Extraction type
|
||||
* @param start - Start offset
|
||||
* @param end - End offset
|
||||
*/
|
||||
type VisitorFn = (
|
||||
block: number, operation: number, start: number, end: number
|
||||
block: number, type: Extract, start: number, end: number
|
||||
) => void
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
@ -41,18 +59,18 @@ type VisitorFn = (
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Extract all non-HTML parts of a string
|
||||
* Split a string into markup and text sections
|
||||
*
|
||||
* This function preprocesses the given string by isolating all non-HTML parts,
|
||||
* in order to ensure that HTML tags are removed before indexing. Note that it
|
||||
* intentionally expects a visitor function argument, as opposed to collecting
|
||||
* and returning all sections, for better memory efficiency.
|
||||
* This function scans a string and divides it up into sections of markup and
|
||||
* text. For each section, it invokes the given visitor function with the block
|
||||
* index, extraction type, as well as start and end offsets. Using a visitor
|
||||
* function (= streaming data) is ideal for minimizing pressure on the GC.
|
||||
*
|
||||
* @param value - String value
|
||||
* @param input - Input value
|
||||
* @param fn - Visitor function
|
||||
*/
|
||||
export function extract(
|
||||
value: string, fn: VisitorFn
|
||||
input: string, fn: VisitorFn
|
||||
): void {
|
||||
|
||||
let block = 0 /* Current block */
|
||||
@ -60,22 +78,22 @@ export function extract(
|
||||
let end = 0 /* Current end offset */
|
||||
|
||||
/* Split string into sections */
|
||||
for (let stack = 0; end < value.length; end++) {
|
||||
for (let stack = 0; end < input.length; end++) {
|
||||
|
||||
/* Tag start after non-empty section */
|
||||
if (value.charAt(end) === "<" && end > start) {
|
||||
fn(block, 1, start, start = end)
|
||||
/* Opening tag after non-empty section */
|
||||
if (input.charAt(end) === "<" && end > start) {
|
||||
fn(block, Extract.TEXT, start, start = end)
|
||||
|
||||
/* Tag end */
|
||||
} else if (value.charAt(end) === ">") {
|
||||
if (value.charAt(start + 1) === "/") {
|
||||
/* Closing tag */
|
||||
} else if (input.charAt(end) === ">") {
|
||||
if (input.charAt(start + 1) === "/") {
|
||||
if (--stack === 0)
|
||||
fn(block++, 2, start, end + 1)
|
||||
fn(block++, Extract.TAG_CLOSE, start, end + 1)
|
||||
|
||||
/* Tag is not self-closing */
|
||||
} else if (value.charAt(end - 1) !== "/") {
|
||||
} else if (input.charAt(end - 1) !== "/") {
|
||||
if (stack++ === 0)
|
||||
fn(block, 0, start, end + 1)
|
||||
fn(block, Extract.TAG_OPEN, start, end + 1)
|
||||
}
|
||||
|
||||
/* New section */
|
||||
@ -85,5 +103,5 @@ export function extract(
|
||||
|
||||
/* Add trailing section */
|
||||
if (end > start)
|
||||
fn(block, 1, start, end)
|
||||
fn(block, Extract.TEXT, start, end)
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Table for indexing
|
||||
* Position table
|
||||
*/
|
||||
export type PositionTable = number[][]
|
||||
|
||||
@ -46,21 +46,55 @@ export type Position = number
|
||||
* when executing the query. It then highlights all occurrences, and returns
|
||||
* their concatenation. In case of multiple blocks, two are returned.
|
||||
*
|
||||
* @param value - String value
|
||||
* @param input - Input value
|
||||
* @param table - Table for indexing
|
||||
* @param positions - Occurrences
|
||||
*
|
||||
* @returns Highlighted string value
|
||||
*/
|
||||
export function highlight(
|
||||
value: string, table: PositionTable, positions: Position[]
|
||||
input: string, table: PositionTable, positions: Position[]
|
||||
): string {
|
||||
return highlightAll([input], table, positions).pop()!
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight all occurrences in a set of strings
|
||||
*
|
||||
* @param inputs - Input values
|
||||
* @param table - Table for indexing
|
||||
* @param positions - Occurrences
|
||||
*
|
||||
* @returns Highlighted string values
|
||||
*/
|
||||
export function highlightAll(
|
||||
inputs: string[], table: PositionTable, positions: Position[]
|
||||
): string[] {
|
||||
|
||||
/* Map blocks to input values */
|
||||
const mapping = [0]
|
||||
for (let t = 1; t < table.length; t++) {
|
||||
const prev = table[t - 1]
|
||||
const next = table[t]
|
||||
|
||||
/* Check if table points to new block */
|
||||
const p = prev[prev.length - 1] >>> 2 & 0x3FF
|
||||
const q = next[0] >>> 12
|
||||
|
||||
/* Add block to mapping */
|
||||
mapping.push(+(p > q) + mapping[mapping.length - 1])
|
||||
}
|
||||
|
||||
/* Highlight strings one after another */
|
||||
return inputs.map((input, i) => {
|
||||
|
||||
/* Map occurrences to blocks */
|
||||
const blocks = new Map<number, number[]>()
|
||||
for (const i of positions.sort((a, b) => a - b)) {
|
||||
const block = i >>> 20
|
||||
const index = i & 0xFFFFF
|
||||
for (const p of positions.sort((a, b) => a - b)) {
|
||||
const index = p & 0xFFFFF
|
||||
const block = p >>> 20
|
||||
if (mapping[block] !== i)
|
||||
continue
|
||||
|
||||
/* Ensure presence of block group */
|
||||
let group = blocks.get(block)
|
||||
@ -71,6 +105,10 @@ export function highlight(
|
||||
group.push(index)
|
||||
}
|
||||
|
||||
/* Just return string, if no occurrences */
|
||||
if (blocks.size === 0)
|
||||
return input
|
||||
|
||||
/* Compute slices */
|
||||
const slices: string[] = []
|
||||
for (const [block, indexes] of blocks) {
|
||||
@ -81,18 +119,20 @@ export function highlight(
|
||||
const end = t[t.length - 1] >>> 12
|
||||
const length = t[t.length - 1] >>> 2 & 0x3FF
|
||||
|
||||
/* Extract and highlight slice/block */
|
||||
let slice = value.slice(start, end + length)
|
||||
for (const i of indexes.sort((a, b) => b - a)) {
|
||||
/* Extract and highlight slice */
|
||||
let slice = input.slice(start, end + length)
|
||||
for (const j of indexes.sort((a, b) => b - a)) {
|
||||
|
||||
/* Retrieve offset and length of match */
|
||||
const p = (t[i] >>> 12) - start
|
||||
const q = (t[i] >>> 2 & 0x3FF) + p
|
||||
const p = (t[j] >>> 12) - start
|
||||
const q = (t[j] >>> 2 & 0x3FF) + p
|
||||
|
||||
/* Wrap occurrence */
|
||||
slice = [
|
||||
slice.slice(0, p),
|
||||
"<mark>", slice.slice(p, q), "</mark>",
|
||||
"<mark>",
|
||||
slice.slice(p, q),
|
||||
"</mark>",
|
||||
slice.slice(q)
|
||||
].join("")
|
||||
}
|
||||
@ -102,6 +142,7 @@ export function highlight(
|
||||
break
|
||||
}
|
||||
|
||||
/* Return highlighted string value */
|
||||
/* Return highlighted slices */
|
||||
return slices.join("")
|
||||
})
|
||||
}
|
||||
|
@ -21,19 +21,29 @@
|
||||
*/
|
||||
|
||||
import { split } from "../_"
|
||||
import { extract } from "../extract"
|
||||
import {
|
||||
Extract,
|
||||
extract
|
||||
} from "../extract"
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Functions
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Split a string into tokens
|
||||
* Split a string or set of strings into tokens
|
||||
*
|
||||
* This tokenizer supersedes the default tokenizer that is provided by Lunr.js,
|
||||
* as it is aware of HTML tags and allows for multi-character splitting.
|
||||
*
|
||||
* @param input - String value or token
|
||||
* It takes the given inputs, splits each of them into markup and text sections,
|
||||
* tokenizes and segments (if necessary) each of them, and then indexes them in
|
||||
* a table by using a compact bit representation. Bitwise techniques are used
|
||||
* to write and read from the table during indexing and querying.
|
||||
*
|
||||
* @see https://bit.ly/3W3Xw4J - Search: better, faster, smaller
|
||||
*
|
||||
* @param input - Input value(s)
|
||||
*
|
||||
* @returns Tokens
|
||||
*/
|
||||
@ -41,67 +51,75 @@ export function tokenize(
|
||||
input?: string | string[]
|
||||
): lunr.Token[] {
|
||||
const tokens: lunr.Token[] = []
|
||||
if (typeof input === "undefined")
|
||||
return tokens
|
||||
|
||||
/**
|
||||
* Initialize segmenter, if loaded
|
||||
*
|
||||
* Note that doing this here is not ideal, but it's okay as we just test it
|
||||
* before bringing the new search implementation in its final shape.
|
||||
*/
|
||||
/* Initialize segmenter, if loaded */
|
||||
const segmenter = "TinySegmenter" in lunr
|
||||
? new lunr.TinySegmenter()
|
||||
: undefined
|
||||
|
||||
/* Tokenize an array of string values */
|
||||
if (Array.isArray(input)) {
|
||||
// @todo: handle multi-valued fields (e.g. tags)
|
||||
for (const value of input)
|
||||
tokens.push(...tokenize(value))
|
||||
|
||||
/* Tokenize a string value */
|
||||
} else if (input) {
|
||||
/* Tokenize strings one after another */
|
||||
const inputs = Array.isArray(input) ? input : [input]
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const table = lunr.tokenizer.table
|
||||
const total = table.length
|
||||
|
||||
/* Split string into sections and tokenize content blocks */
|
||||
extract(input, (block, type, start, end) => {
|
||||
if (type & 1) {
|
||||
const section = input.slice(start, end)
|
||||
extract(inputs[i], (block, type, start, end) => {
|
||||
block += total
|
||||
switch (type) {
|
||||
|
||||
/* Handle markup */
|
||||
case Extract.TAG_OPEN:
|
||||
case Extract.TAG_CLOSE:
|
||||
table[block] ||= []
|
||||
table[block].push(
|
||||
start << 12 |
|
||||
end - start << 2 |
|
||||
type
|
||||
)
|
||||
break
|
||||
|
||||
/* Handle text content */
|
||||
case Extract.TEXT:
|
||||
const section = inputs[i].slice(start, end)
|
||||
split(section, lunr.tokenizer.separator, (index, until) => {
|
||||
|
||||
/**
|
||||
* Apply segmenter after tokenization. Note that the segmenter will
|
||||
* also split words at word boundaries, which is not what we want, so
|
||||
* we need to check if we can somehow mitigate this behavior.
|
||||
* also split words at word boundaries, which is not what we want,
|
||||
* so we need to check if we can somehow mitigate this behavior.
|
||||
*/
|
||||
if (typeof segmenter !== "undefined") {
|
||||
const subsection = section.slice(index, until)
|
||||
if (/^[MHIK]$/.test(segmenter.ctype_(subsection))) {
|
||||
const segments = segmenter.segment(subsection)
|
||||
for (let i = 0, l = 0; i < segments.length; i++) {
|
||||
for (let s = 0, l = 0; s < segments.length; s++) {
|
||||
|
||||
/* Add block to table */
|
||||
/* Add block to section */
|
||||
table[block] ||= []
|
||||
table[block].push(
|
||||
start + index + l << 12 |
|
||||
segments[i].length << 2 |
|
||||
segments[s].length << 2 |
|
||||
type
|
||||
)
|
||||
|
||||
/* Add block as token */
|
||||
/* Add token with position */
|
||||
tokens.push(new lunr.Token(
|
||||
segments[i].toLowerCase(), {
|
||||
segments[s].toLowerCase(), {
|
||||
position: block << 20 | table[block].length - 1
|
||||
}
|
||||
))
|
||||
|
||||
/* Keep track of length */
|
||||
l += segments[i].length
|
||||
l += segments[s].length
|
||||
}
|
||||
return // combine segmenter with other approach!?
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
/* Add block to table */
|
||||
/* Add block to section */
|
||||
table[block] ||= []
|
||||
table[block].push(
|
||||
start + index << 12 |
|
||||
@ -109,22 +127,13 @@ export function tokenize(
|
||||
type
|
||||
)
|
||||
|
||||
/* Add block as token */
|
||||
/* Add token with position */
|
||||
tokens.push(new lunr.Token(
|
||||
section.slice(index, until).toLowerCase(), {
|
||||
position: block << 20 | table[block].length - 1
|
||||
}
|
||||
))
|
||||
})
|
||||
|
||||
/* Add non-content block to table */
|
||||
} else {
|
||||
table[block] ||= []
|
||||
table[block].push(
|
||||
start << 12 |
|
||||
end - start << 2 |
|
||||
type
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
6
typings/lunr/index.d.ts
vendored
6
typings/lunr/index.d.ts
vendored
@ -26,15 +26,17 @@ import lunr from "lunr"
|
||||
* Global types
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
type Fields = "text" | "title" | "tags"
|
||||
|
||||
declare global {
|
||||
namespace lunr {
|
||||
|
||||
/**
|
||||
* Index - expose inverted index
|
||||
*/
|
||||
interface Index {
|
||||
interface Index { // this is defined in the actual inverface...
|
||||
invertedIndex: Record<string, unknown>
|
||||
fields: string[] // @todo: make typing generic?
|
||||
fields: Fields[]
|
||||
}
|
||||
|
||||
interface Builder {
|
||||
|
Loading…
Reference in New Issue
Block a user