mirror of
https://github.com/squidfunk/mkdocs-material.git
synced 2024-06-14 11:52:32 +03:00
Fixed highlighting of tags
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
material/assets/stylesheets/extra.d35223bf.min.css
vendored
Normal file
1
material/assets/stylesheets/extra.d35223bf.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1
material/assets/stylesheets/extra.d35223bf.min.css.map
Normal file
1
material/assets/stylesheets/extra.d35223bf.min.css.map
Normal file
File diff suppressed because one or more lines are too long
@@ -211,7 +211,7 @@
|
|||||||
"base": base_url,
|
"base": base_url,
|
||||||
"features": features,
|
"features": features,
|
||||||
"translations": {},
|
"translations": {},
|
||||||
"search": "assets/javascripts/workers/search.208e55ea.min.js" | url
|
"search": "assets/javascripts/workers/search.f5389c75.min.js" | url
|
||||||
} -%}
|
} -%}
|
||||||
{%- if config.extra.version -%}
|
{%- if config.extra.version -%}
|
||||||
{%- set _ = app.update({ "version": config.extra.version }) -%}
|
{%- set _ = app.update({ "version": config.extra.version }) -%}
|
||||||
@@ -239,13 +239,13 @@
|
|||||||
</script>
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script src="{{ 'assets/javascripts/bundle.f1ef77e2.min.js' | url }}"></script>
|
<script src="{{ 'assets/javascripts/bundle.ce0331ff.min.js' | url }}"></script>
|
||||||
{% for path in config.extra_javascript %}
|
{% for path in config.extra_javascript %}
|
||||||
<script src="{{ path | url }}"></script>
|
<script src="{{ path | url }}"></script>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
{% if page.meta and page.meta.ᴴₒᴴₒᴴₒ %}
|
{% if page.meta and page.meta.ᴴₒᴴₒᴴₒ %}
|
||||||
<link rel="stylesheet" href="{{ 'assets/stylesheets/extra.b3906f4e.min.css' | url }}">
|
<link rel="stylesheet" href="{{ 'assets/stylesheets/extra.d35223bf.min.css' | url }}">
|
||||||
<script src="{{ 'assets/javascripts/extra/bundle.f719a234.min.js' | url }}" defer></script>
|
<script src="{{ 'assets/javascripts/extra/bundle.f719a234.min.js' | url }}" defer></script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ import {
|
|||||||
Position,
|
Position,
|
||||||
PositionTable,
|
PositionTable,
|
||||||
highlight,
|
highlight,
|
||||||
|
highlightAll,
|
||||||
tokenize
|
tokenize
|
||||||
} from "../internal"
|
} from "../internal"
|
||||||
import {
|
import {
|
||||||
@@ -46,7 +47,9 @@ import {
|
|||||||
/**
|
/**
|
||||||
* Search item
|
* Search item
|
||||||
*/
|
*/
|
||||||
export interface SearchItem extends SearchDocument {
|
export interface SearchItem
|
||||||
|
extends SearchDocument
|
||||||
|
{
|
||||||
score: number /* Score (relevance) */
|
score: number /* Score (relevance) */
|
||||||
terms: SearchQueryTerms /* Search query terms */
|
terms: SearchQueryTerms /* Search query terms */
|
||||||
}
|
}
|
||||||
@@ -213,6 +216,8 @@ export class Search {
|
|||||||
.reduce<SearchItem[]>((item, { ref, score, matchData }) => {
|
.reduce<SearchItem[]>((item, { ref, score, matchData }) => {
|
||||||
let doc = this.map.get(ref)
|
let doc = this.map.get(ref)
|
||||||
if (typeof doc !== "undefined") {
|
if (typeof doc !== "undefined") {
|
||||||
|
|
||||||
|
/* Shallow copy document */
|
||||||
doc = { ...doc }
|
doc = { ...doc }
|
||||||
if (doc.tags)
|
if (doc.tags)
|
||||||
doc.tags = [...doc.tags]
|
doc.tags = [...doc.tags]
|
||||||
@@ -223,39 +228,29 @@ export class Search {
|
|||||||
Object.keys(matchData.metadata)
|
Object.keys(matchData.metadata)
|
||||||
)
|
)
|
||||||
|
|
||||||
// we must collect all positions for each term!
|
/* Highlight matches in fields */
|
||||||
// we now take the keys of the index
|
|
||||||
for (const field of this.index.fields) {
|
for (const field of this.index.fields) {
|
||||||
if (!(field in doc))
|
if (typeof doc[field] === "undefined")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
/* Collect matches */
|
/* Collect positions from matches */
|
||||||
const positions: Position[] = []
|
const positions: Position[] = []
|
||||||
for (const match of Object.values(matchData.metadata))
|
for (const match of Object.values(matchData.metadata))
|
||||||
if (field in match)
|
if (typeof match[field] !== "undefined")
|
||||||
positions.push(...match[field].position)
|
positions.push(...match[field].position)
|
||||||
|
|
||||||
/* Skip field, if no highlighting is necessary */
|
/* Skip highlighting, if no positions were collected */
|
||||||
if (!positions.length)
|
if (!positions.length)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
// @ts-expect-error - @todo fix typings
|
/* Load table and determine highlighting method */
|
||||||
if (Array.isArray(doc[field])) {
|
const table = this.table.get([doc.location, field].join(":"))!
|
||||||
// @ts-expect-error - @todo fix typings
|
const fn = Array.isArray(doc[field])
|
||||||
for (let i = 0; i < doc[field].length; i++) {
|
? highlightAll
|
||||||
// @ts-expect-error - @todo fix typings
|
: highlight
|
||||||
doc[field][i] = highlight(doc[field][i],
|
|
||||||
this.table.get([doc.location, field].join(":"))!,
|
// @ts-expect-error - stop moaning, TypeScript!
|
||||||
positions
|
doc[field] = fn(doc[field], table, positions)
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// @ts-expect-error - @todo fix typings
|
|
||||||
doc[field] = highlight(doc[field],
|
|
||||||
this.table.get([doc.location, field].join(":"))!,
|
|
||||||
positions
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Highlight title and text and apply post-query boosts */
|
/* Highlight title and text and apply post-query boosts */
|
||||||
|
|||||||
@@ -41,15 +41,12 @@ type VisitorFn = (
|
|||||||
/**
|
/**
|
||||||
* Split a string using the given separator
|
* Split a string using the given separator
|
||||||
*
|
*
|
||||||
* This function intentionally expects a visitor function argument, as opposed
|
* @param input - Input value
|
||||||
* to collecting and returning all sections, for better memory efficiency.
|
|
||||||
*
|
|
||||||
* @param value - String value
|
|
||||||
* @param separator - Separator
|
* @param separator - Separator
|
||||||
* @param fn - Visitor function
|
* @param fn - Visitor function
|
||||||
*/
|
*/
|
||||||
export function split(
|
export function split(
|
||||||
value: string, separator: RegExp, fn: VisitorFn
|
input: string, separator: RegExp, fn: VisitorFn
|
||||||
): void {
|
): void {
|
||||||
separator = new RegExp(separator, "g")
|
separator = new RegExp(separator, "g")
|
||||||
|
|
||||||
@@ -57,10 +54,10 @@ export function split(
|
|||||||
let match: RegExpExecArray | null
|
let match: RegExpExecArray | null
|
||||||
let index = 0
|
let index = 0
|
||||||
do {
|
do {
|
||||||
match = separator.exec(value)
|
match = separator.exec(input)
|
||||||
|
|
||||||
/* Emit non-empty range */
|
/* Emit non-empty range */
|
||||||
const until = match?.index ?? value.length
|
const until = match?.index ?? input.length
|
||||||
if (index < until)
|
if (index < until)
|
||||||
fn(index, until)
|
fn(index, until)
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,24 @@
|
|||||||
* IN THE SOFTWARE.
|
* IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------------
|
||||||
|
* Types
|
||||||
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extraction type
|
||||||
|
*
|
||||||
|
* This type defines the possible values that are encoded into the first two
|
||||||
|
* bits of a section that is part of the blocks of a tokenization table. There
|
||||||
|
* are three types of interest: HTML opening and closing tags, as well as the
|
||||||
|
* actual text content we need to extract for indexing.
|
||||||
|
*/
|
||||||
|
export const enum Extract {
|
||||||
|
TAG_OPEN = 0, /* HTML opening tag */
|
||||||
|
TEXT = 1, /* Text content */
|
||||||
|
TAG_CLOSE = 2 /* HTML closing tag */
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
* Helper types
|
* Helper types
|
||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
@@ -28,12 +46,12 @@
|
|||||||
* Visitor function
|
* Visitor function
|
||||||
*
|
*
|
||||||
* @param block - Block index
|
* @param block - Block index
|
||||||
* @param operation - Operation index
|
* @param type - Extraction type
|
||||||
* @param start - Start offset
|
* @param start - Start offset
|
||||||
* @param end - End offset
|
* @param end - End offset
|
||||||
*/
|
*/
|
||||||
type VisitorFn = (
|
type VisitorFn = (
|
||||||
block: number, operation: number, start: number, end: number
|
block: number, type: Extract, start: number, end: number
|
||||||
) => void
|
) => void
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
@@ -41,18 +59,18 @@ type VisitorFn = (
|
|||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract all non-HTML parts of a string
|
* Split a string into markup and text sections
|
||||||
*
|
*
|
||||||
* This function preprocesses the given string by isolating all non-HTML parts,
|
* This function scans a string and divides it up into sections of markup and
|
||||||
* in order to ensure that HTML tags are removed before indexing. Note that it
|
* text. For each section, it invokes the given visitor function with the block
|
||||||
* intentionally expects a visitor function argument, as opposed to collecting
|
* index, extraction type, as well as start and end offsets. Using a visitor
|
||||||
* and returning all sections, for better memory efficiency.
|
* function (= streaming data) is ideal for minimizing pressure on the GC.
|
||||||
*
|
*
|
||||||
* @param value - String value
|
* @param input - Input value
|
||||||
* @param fn - Visitor function
|
* @param fn - Visitor function
|
||||||
*/
|
*/
|
||||||
export function extract(
|
export function extract(
|
||||||
value: string, fn: VisitorFn
|
input: string, fn: VisitorFn
|
||||||
): void {
|
): void {
|
||||||
|
|
||||||
let block = 0 /* Current block */
|
let block = 0 /* Current block */
|
||||||
@@ -60,22 +78,22 @@ export function extract(
|
|||||||
let end = 0 /* Current end offset */
|
let end = 0 /* Current end offset */
|
||||||
|
|
||||||
/* Split string into sections */
|
/* Split string into sections */
|
||||||
for (let stack = 0; end < value.length; end++) {
|
for (let stack = 0; end < input.length; end++) {
|
||||||
|
|
||||||
/* Tag start after non-empty section */
|
/* Opening tag after non-empty section */
|
||||||
if (value.charAt(end) === "<" && end > start) {
|
if (input.charAt(end) === "<" && end > start) {
|
||||||
fn(block, 1, start, start = end)
|
fn(block, Extract.TEXT, start, start = end)
|
||||||
|
|
||||||
/* Tag end */
|
/* Closing tag */
|
||||||
} else if (value.charAt(end) === ">") {
|
} else if (input.charAt(end) === ">") {
|
||||||
if (value.charAt(start + 1) === "/") {
|
if (input.charAt(start + 1) === "/") {
|
||||||
if (--stack === 0)
|
if (--stack === 0)
|
||||||
fn(block++, 2, start, end + 1)
|
fn(block++, Extract.TAG_CLOSE, start, end + 1)
|
||||||
|
|
||||||
/* Tag is not self-closing */
|
/* Tag is not self-closing */
|
||||||
} else if (value.charAt(end - 1) !== "/") {
|
} else if (input.charAt(end - 1) !== "/") {
|
||||||
if (stack++ === 0)
|
if (stack++ === 0)
|
||||||
fn(block, 0, start, end + 1)
|
fn(block, Extract.TAG_OPEN, start, end + 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* New section */
|
/* New section */
|
||||||
@@ -85,5 +103,5 @@ export function extract(
|
|||||||
|
|
||||||
/* Add trailing section */
|
/* Add trailing section */
|
||||||
if (end > start)
|
if (end > start)
|
||||||
fn(block, 1, start, end)
|
fn(block, Extract.TEXT, start, end)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Table for indexing
|
* Position table
|
||||||
*/
|
*/
|
||||||
export type PositionTable = number[][]
|
export type PositionTable = number[][]
|
||||||
|
|
||||||
@@ -46,62 +46,103 @@ export type Position = number
|
|||||||
* when executing the query. It then highlights all occurrences, and returns
|
* when executing the query. It then highlights all occurrences, and returns
|
||||||
* their concatenation. In case of multiple blocks, two are returned.
|
* their concatenation. In case of multiple blocks, two are returned.
|
||||||
*
|
*
|
||||||
* @param value - String value
|
* @param input - Input value
|
||||||
* @param table - Table for indexing
|
* @param table - Table for indexing
|
||||||
* @param positions - Occurrences
|
* @param positions - Occurrences
|
||||||
*
|
*
|
||||||
* @returns Highlighted string value
|
* @returns Highlighted string value
|
||||||
*/
|
*/
|
||||||
export function highlight(
|
export function highlight(
|
||||||
value: string, table: PositionTable, positions: Position[]
|
input: string, table: PositionTable, positions: Position[]
|
||||||
): string {
|
): string {
|
||||||
|
return highlightAll([input], table, positions).pop()!
|
||||||
|
}
|
||||||
|
|
||||||
/* Map occurrences to blocks */
|
/**
|
||||||
const blocks = new Map<number, number[]>()
|
* Highlight all occurrences in a set of strings
|
||||||
for (const i of positions.sort((a, b) => a - b)) {
|
*
|
||||||
const block = i >>> 20
|
* @param inputs - Input values
|
||||||
const index = i & 0xFFFFF
|
* @param table - Table for indexing
|
||||||
|
* @param positions - Occurrences
|
||||||
|
*
|
||||||
|
* @returns Highlighted string values
|
||||||
|
*/
|
||||||
|
export function highlightAll(
|
||||||
|
inputs: string[], table: PositionTable, positions: Position[]
|
||||||
|
): string[] {
|
||||||
|
|
||||||
/* Ensure presence of block group */
|
/* Map blocks to input values */
|
||||||
let group = blocks.get(block)
|
const mapping = [0]
|
||||||
if (typeof group === "undefined")
|
for (let t = 1; t < table.length; t++) {
|
||||||
blocks.set(block, group = [])
|
const prev = table[t - 1]
|
||||||
|
const next = table[t]
|
||||||
|
|
||||||
/* Add index to group */
|
/* Check if table points to new block */
|
||||||
group.push(index)
|
const p = prev[prev.length - 1] >>> 2 & 0x3FF
|
||||||
|
const q = next[0] >>> 12
|
||||||
|
|
||||||
|
/* Add block to mapping */
|
||||||
|
mapping.push(+(p > q) + mapping[mapping.length - 1])
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute slices */
|
/* Highlight strings one after another */
|
||||||
const slices: string[] = []
|
return inputs.map((input, i) => {
|
||||||
for (const [block, indexes] of blocks) {
|
|
||||||
const t = table[block]
|
|
||||||
|
|
||||||
/* Extract positions and length */
|
/* Map occurrences to blocks */
|
||||||
const start = t[0] >>> 12
|
const blocks = new Map<number, number[]>()
|
||||||
const end = t[t.length - 1] >>> 12
|
for (const p of positions.sort((a, b) => a - b)) {
|
||||||
const length = t[t.length - 1] >>> 2 & 0x3FF
|
const index = p & 0xFFFFF
|
||||||
|
const block = p >>> 20
|
||||||
|
if (mapping[block] !== i)
|
||||||
|
continue
|
||||||
|
|
||||||
/* Extract and highlight slice/block */
|
/* Ensure presence of block group */
|
||||||
let slice = value.slice(start, end + length)
|
let group = blocks.get(block)
|
||||||
for (const i of indexes.sort((a, b) => b - a)) {
|
if (typeof group === "undefined")
|
||||||
|
blocks.set(block, group = [])
|
||||||
|
|
||||||
/* Retrieve offset and length of match */
|
/* Add index to group */
|
||||||
const p = (t[i] >>> 12) - start
|
group.push(index)
|
||||||
const q = (t[i] >>> 2 & 0x3FF) + p
|
|
||||||
|
|
||||||
/* Wrap occurrence */
|
|
||||||
slice = [
|
|
||||||
slice.slice(0, p),
|
|
||||||
"<mark>", slice.slice(p, q), "</mark>",
|
|
||||||
slice.slice(q)
|
|
||||||
].join("")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Append slice and abort if we have two */
|
/* Just return string, if no occurrences */
|
||||||
if (slices.push(slice) === 2)
|
if (blocks.size === 0)
|
||||||
break
|
return input
|
||||||
}
|
|
||||||
|
|
||||||
/* Return highlighted string value */
|
/* Compute slices */
|
||||||
return slices.join("")
|
const slices: string[] = []
|
||||||
|
for (const [block, indexes] of blocks) {
|
||||||
|
const t = table[block]
|
||||||
|
|
||||||
|
/* Extract positions and length */
|
||||||
|
const start = t[0] >>> 12
|
||||||
|
const end = t[t.length - 1] >>> 12
|
||||||
|
const length = t[t.length - 1] >>> 2 & 0x3FF
|
||||||
|
|
||||||
|
/* Extract and highlight slice */
|
||||||
|
let slice = input.slice(start, end + length)
|
||||||
|
for (const j of indexes.sort((a, b) => b - a)) {
|
||||||
|
|
||||||
|
/* Retrieve offset and length of match */
|
||||||
|
const p = (t[j] >>> 12) - start
|
||||||
|
const q = (t[j] >>> 2 & 0x3FF) + p
|
||||||
|
|
||||||
|
/* Wrap occurrence */
|
||||||
|
slice = [
|
||||||
|
slice.slice(0, p),
|
||||||
|
"<mark>",
|
||||||
|
slice.slice(p, q),
|
||||||
|
"</mark>",
|
||||||
|
slice.slice(q)
|
||||||
|
].join("")
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Append slice and abort if we have two */
|
||||||
|
if (slices.push(slice) === 2)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return highlighted slices */
|
||||||
|
return slices.join("")
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,19 +21,29 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { split } from "../_"
|
import { split } from "../_"
|
||||||
import { extract } from "../extract"
|
import {
|
||||||
|
Extract,
|
||||||
|
extract
|
||||||
|
} from "../extract"
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
* Functions
|
* Functions
|
||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split a string into tokens
|
* Split a string or set of strings into tokens
|
||||||
*
|
*
|
||||||
* This tokenizer supersedes the default tokenizer that is provided by Lunr.js,
|
* This tokenizer supersedes the default tokenizer that is provided by Lunr.js,
|
||||||
* as it is aware of HTML tags and allows for multi-character splitting.
|
* as it is aware of HTML tags and allows for multi-character splitting.
|
||||||
*
|
*
|
||||||
* @param input - String value or token
|
* It takes the given inputs, splits each of them into markup and text sections,
|
||||||
|
* tokenizes and segments (if necessary) each of them, and then indexes them in
|
||||||
|
* a table by using a compact bit representation. Bitwise techniques are used
|
||||||
|
* to write and read from the table during indexing and querying.
|
||||||
|
*
|
||||||
|
* @see https://bit.ly/3W3Xw4J - Search: better, faster, smaller
|
||||||
|
*
|
||||||
|
* @param input - Input value(s)
|
||||||
*
|
*
|
||||||
* @returns Tokens
|
* @returns Tokens
|
||||||
*/
|
*/
|
||||||
@@ -41,90 +51,89 @@ export function tokenize(
|
|||||||
input?: string | string[]
|
input?: string | string[]
|
||||||
): lunr.Token[] {
|
): lunr.Token[] {
|
||||||
const tokens: lunr.Token[] = []
|
const tokens: lunr.Token[] = []
|
||||||
|
if (typeof input === "undefined")
|
||||||
|
return tokens
|
||||||
|
|
||||||
/**
|
/* Initialize segmenter, if loaded */
|
||||||
* Initialize segmenter, if loaded
|
|
||||||
*
|
|
||||||
* Note that doing this here is not ideal, but it's okay as we just test it
|
|
||||||
* before bringing the new search implementation in its final shape.
|
|
||||||
*/
|
|
||||||
const segmenter = "TinySegmenter" in lunr
|
const segmenter = "TinySegmenter" in lunr
|
||||||
? new lunr.TinySegmenter()
|
? new lunr.TinySegmenter()
|
||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
/* Tokenize an array of string values */
|
/* Tokenize strings one after another */
|
||||||
if (Array.isArray(input)) {
|
const inputs = Array.isArray(input) ? input : [input]
|
||||||
// @todo: handle multi-valued fields (e.g. tags)
|
for (let i = 0; i < inputs.length; i++) {
|
||||||
for (const value of input)
|
|
||||||
tokens.push(...tokenize(value))
|
|
||||||
|
|
||||||
/* Tokenize a string value */
|
|
||||||
} else if (input) {
|
|
||||||
const table = lunr.tokenizer.table
|
const table = lunr.tokenizer.table
|
||||||
|
const total = table.length
|
||||||
|
|
||||||
/* Split string into sections and tokenize content blocks */
|
/* Split string into sections and tokenize content blocks */
|
||||||
extract(input, (block, type, start, end) => {
|
extract(inputs[i], (block, type, start, end) => {
|
||||||
if (type & 1) {
|
block += total
|
||||||
const section = input.slice(start, end)
|
switch (type) {
|
||||||
split(section, lunr.tokenizer.separator, (index, until) => {
|
|
||||||
|
|
||||||
/**
|
/* Handle markup */
|
||||||
* Apply segmenter after tokenization. Note that the segmenter will
|
case Extract.TAG_OPEN:
|
||||||
* also split words at word boundaries, which is not what we want, so
|
case Extract.TAG_CLOSE:
|
||||||
* we need to check if we can somehow mitigate this behavior.
|
|
||||||
*/
|
|
||||||
if (typeof segmenter !== "undefined") {
|
|
||||||
const subsection = section.slice(index, until)
|
|
||||||
if (/^[MHIK]$/.test(segmenter.ctype_(subsection))) {
|
|
||||||
const segments = segmenter.segment(subsection)
|
|
||||||
for (let i = 0, l = 0; i < segments.length; i++) {
|
|
||||||
|
|
||||||
/* Add block to table */
|
|
||||||
table[block] ||= []
|
|
||||||
table[block].push(
|
|
||||||
start + index + l << 12 |
|
|
||||||
segments[i].length << 2 |
|
|
||||||
type
|
|
||||||
)
|
|
||||||
|
|
||||||
/* Add block as token */
|
|
||||||
tokens.push(new lunr.Token(
|
|
||||||
segments[i].toLowerCase(), {
|
|
||||||
position: block << 20 | table[block].length - 1
|
|
||||||
}
|
|
||||||
))
|
|
||||||
|
|
||||||
/* Keep track of length */
|
|
||||||
l += segments[i].length
|
|
||||||
}
|
|
||||||
return // combine segmenter with other approach!?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add block to table */
|
|
||||||
table[block] ||= []
|
table[block] ||= []
|
||||||
table[block].push(
|
table[block].push(
|
||||||
start + index << 12 |
|
start << 12 |
|
||||||
until - index << 2 |
|
end - start << 2 |
|
||||||
type
|
type
|
||||||
)
|
)
|
||||||
|
break
|
||||||
|
|
||||||
/* Add block as token */
|
/* Handle text content */
|
||||||
tokens.push(new lunr.Token(
|
case Extract.TEXT:
|
||||||
section.slice(index, until).toLowerCase(), {
|
const section = inputs[i].slice(start, end)
|
||||||
position: block << 20 | table[block].length - 1
|
split(section, lunr.tokenizer.separator, (index, until) => {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply segmenter after tokenization. Note that the segmenter will
|
||||||
|
* also split words at word boundaries, which is not what we want,
|
||||||
|
* so we need to check if we can somehow mitigate this behavior.
|
||||||
|
*/
|
||||||
|
if (typeof segmenter !== "undefined") {
|
||||||
|
const subsection = section.slice(index, until)
|
||||||
|
if (/^[MHIK]$/.test(segmenter.ctype_(subsection))) {
|
||||||
|
const segments = segmenter.segment(subsection)
|
||||||
|
for (let s = 0, l = 0; s < segments.length; s++) {
|
||||||
|
|
||||||
|
/* Add block to section */
|
||||||
|
table[block] ||= []
|
||||||
|
table[block].push(
|
||||||
|
start + index + l << 12 |
|
||||||
|
segments[s].length << 2 |
|
||||||
|
type
|
||||||
|
)
|
||||||
|
|
||||||
|
/* Add token with position */
|
||||||
|
tokens.push(new lunr.Token(
|
||||||
|
segments[s].toLowerCase(), {
|
||||||
|
position: block << 20 | table[block].length - 1
|
||||||
|
}
|
||||||
|
))
|
||||||
|
|
||||||
|
/* Keep track of length */
|
||||||
|
l += segments[s].length
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
))
|
|
||||||
})
|
|
||||||
|
|
||||||
/* Add non-content block to table */
|
/* Add block to section */
|
||||||
} else {
|
table[block] ||= []
|
||||||
table[block] ||= []
|
table[block].push(
|
||||||
table[block].push(
|
start + index << 12 |
|
||||||
start << 12 |
|
until - index << 2 |
|
||||||
end - start << 2 |
|
type
|
||||||
type
|
)
|
||||||
)
|
|
||||||
|
/* Add token with position */
|
||||||
|
tokens.push(new lunr.Token(
|
||||||
|
section.slice(index, until).toLowerCase(), {
|
||||||
|
position: block << 20 | table[block].length - 1
|
||||||
|
}
|
||||||
|
))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
6
typings/lunr/index.d.ts
vendored
6
typings/lunr/index.d.ts
vendored
@@ -26,15 +26,17 @@ import lunr from "lunr"
|
|||||||
* Global types
|
* Global types
|
||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
type Fields = "text" | "title" | "tags"
|
||||||
|
|
||||||
declare global {
|
declare global {
|
||||||
namespace lunr {
|
namespace lunr {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Index - expose inverted index
|
* Index - expose inverted index
|
||||||
*/
|
*/
|
||||||
interface Index {
|
interface Index { // this is defined in the actual inverface...
|
||||||
invertedIndex: Record<string, unknown>
|
invertedIndex: Record<string, unknown>
|
||||||
fields: string[] // @todo: make typing generic?
|
fields: Fields[]
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Builder {
|
interface Builder {
|
||||||
|
|||||||
Reference in New Issue
Block a user