77 lines
2.3 KiB
JavaScript
77 lines
2.3 KiB
JavaScript
|
/*!
|
||
|
* lunr.tokenizer
|
||
|
* Copyright (C) @YEAR Oliver Nightingale
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* A function for splitting a string into tokens ready to be inserted into
|
||
|
* the search index. Uses `lunr.tokenizer.seperator` to split strings, change
|
||
|
* the value of this property to change how strings are split into tokens.
|
||
|
*
|
||
|
* @module
|
||
|
* @param {String} obj The string to convert into tokens
|
||
|
* @see lunr.tokenizer.seperator
|
||
|
* @returns {Array}
|
||
|
*/
|
||
|
lunr.tokenizer = function (obj) {
|
||
|
if (!arguments.length || obj == null || obj == undefined) return []
|
||
|
if (Array.isArray(obj)) return obj.map(function (t) { return lunr.utils.asString(t).toLowerCase() })
|
||
|
|
||
|
return obj.toString().trim().toLowerCase().split(lunr.tokenizer.seperator)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* The sperator used to split a string into tokens. Override this property to change the behaviour of
|
||
|
* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
|
||
|
*
|
||
|
* @static
|
||
|
* @see lunr.tokenizer
|
||
|
*/
|
||
|
lunr.tokenizer.seperator = /[\s\-]+/
|
||
|
|
||
|
/**
|
||
|
* Loads a previously serialised tokenizer.
|
||
|
*
|
||
|
* A tokenizer function to be loaded must already be registered with lunr.tokenizer.
|
||
|
* If the serialised tokenizer has not been registered then an error will be thrown.
|
||
|
*
|
||
|
* @param {String} label The label of the serialised tokenizer.
|
||
|
* @returns {Function}
|
||
|
* @memberOf tokenizer
|
||
|
*/
|
||
|
lunr.tokenizer.load = function (label) {
|
||
|
var fn = this.registeredFunctions[label]
|
||
|
|
||
|
if (!fn) {
|
||
|
throw new Error('Cannot load un-registered function: ' + label)
|
||
|
}
|
||
|
|
||
|
return fn
|
||
|
}
|
||
|
|
||
|
lunr.tokenizer.label = 'default'
|
||
|
|
||
|
lunr.tokenizer.registeredFunctions = {
|
||
|
'default': lunr.tokenizer
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Register a tokenizer function.
|
||
|
*
|
||
|
* Functions that are used as tokenizers should be registered if they are to be used with a serialised index.
|
||
|
*
|
||
|
* Registering a function does not add it to an index, functions must still be associated with a specific index for them to be used when indexing and searching documents.
|
||
|
*
|
||
|
* @param {Function} fn The function to register.
|
||
|
* @param {String} label The label to register this function with
|
||
|
* @memberOf tokenizer
|
||
|
*/
|
||
|
lunr.tokenizer.registerFunction = function (fn, label) {
|
||
|
if (label in this.registeredFunctions) {
|
||
|
lunr.utils.warn('Overwriting existing tokenizer: ' + label)
|
||
|
}
|
||
|
|
||
|
fn.label = label
|
||
|
this.registeredFunctions[label] = fn
|
||
|
}
|