website/themes/hugo-theme-wave/static/bower_components/lunr.js/lib/index.js
2020-01-12 23:32:32 +01:00

470 lines
14 KiB
JavaScript

/*!
* lunr.Index
* Copyright (C) @YEAR Oliver Nightingale
*/
/**
* lunr.Index is object that manages a search index. It contains the indexes
* and stores all the tokens and document lookups. It also provides the main
* user facing API for the library.
*
* @constructor
*/
lunr.Index = function () {
this._fields = []
this._ref = 'id'
this.pipeline = new lunr.Pipeline
this.documentStore = new lunr.Store
this.tokenStore = new lunr.TokenStore
this.corpusTokens = new lunr.SortedSet
this.eventEmitter = new lunr.EventEmitter
this.tokenizerFn = lunr.tokenizer
this._idfCache = {}
this.on('add', 'remove', 'update', (function () {
this._idfCache = {}
}).bind(this))
}
/**
* Bind a handler to events being emitted by the index.
*
* The handler can be bound to many events at the same time.
*
* @param {String} [eventName] The name(s) of events to bind the function to.
* @param {Function} fn The serialised set to load.
* @memberOf Index
*/
lunr.Index.prototype.on = function () {
var args = Array.prototype.slice.call(arguments)
return this.eventEmitter.addListener.apply(this.eventEmitter, args)
}
/**
* Removes a handler from an event being emitted by the index.
*
* @param {String} eventName The name of events to remove the function from.
* @param {Function} fn The serialised set to load.
* @memberOf Index
*/
lunr.Index.prototype.off = function (name, fn) {
return this.eventEmitter.removeListener(name, fn)
}
/**
* Loads a previously serialised index.
*
* Issues a warning if the index being imported was serialised
* by a different version of lunr.
*
* @param {Object} serialisedData The serialised set to load.
* @returns {lunr.Index}
* @memberOf Index
*/
lunr.Index.load = function (serialisedData) {
if (serialisedData.version !== lunr.version) {
lunr.utils.warn('version mismatch: current ' + lunr.version + ' importing ' + serialisedData.version)
}
var idx = new this
idx._fields = serialisedData.fields
idx._ref = serialisedData.ref
idx.tokenizer = lunr.tokenizer.load(serialisedData.tokenizer)
idx.documentStore = lunr.Store.load(serialisedData.documentStore)
idx.tokenStore = lunr.TokenStore.load(serialisedData.tokenStore)
idx.corpusTokens = lunr.SortedSet.load(serialisedData.corpusTokens)
idx.pipeline = lunr.Pipeline.load(serialisedData.pipeline)
return idx
}
/**
* Adds a field to the list of fields that will be searchable within documents
* in the index.
*
* An optional boost param can be passed to affect how much tokens in this field
* rank in search results, by default the boost value is 1.
*
* Fields should be added before any documents are added to the index, fields
* that are added after documents are added to the index will only apply to new
* documents added to the index.
*
* @param {String} fieldName The name of the field within the document that
* should be indexed
* @param {Number} boost An optional boost that can be applied to terms in this
* field.
* @returns {lunr.Index}
* @memberOf Index
*/
lunr.Index.prototype.field = function (fieldName, opts) {
var opts = opts || {},
field = { name: fieldName, boost: opts.boost || 1 }
this._fields.push(field)
return this
}
/**
* Sets the property used to uniquely identify documents added to the index,
* by default this property is 'id'.
*
* This should only be changed before adding documents to the index, changing
* the ref property without resetting the index can lead to unexpected results.
*
* The value of ref can be of any type but it _must_ be stably comparable and
* orderable.
*
* @param {String} refName The property to use to uniquely identify the
* documents in the index.
* @param {Boolean} emitEvent Whether to emit add events, defaults to true
* @returns {lunr.Index}
* @memberOf Index
*/
lunr.Index.prototype.ref = function (refName) {
this._ref = refName
return this
}
/**
* Sets the tokenizer used for this index.
*
* By default the index will use the default tokenizer, lunr.tokenizer. The tokenizer
* should only be changed before adding documents to the index. Changing the tokenizer
* without re-building the index can lead to unexpected results.
*
* @param {Function} fn The function to use as a tokenizer.
* @returns {lunr.Index}
* @memberOf Index
*/
lunr.Index.prototype.tokenizer = function (fn) {
var isRegistered = fn.label && (fn.label in lunr.tokenizer.registeredFunctions)
if (!isRegistered) {
lunr.utils.warn('Function is not a registered tokenizer. This may cause problems when serialising the index')
}
this.tokenizerFn = fn
return this
}
/**
* Add a document to the index.
*
* This is the way new documents enter the index, this function will run the
* fields from the document through the index's pipeline and then add it to
* the index, it will then show up in search results.
*
* An 'add' event is emitted with the document that has been added and the index
* the document has been added to. This event can be silenced by passing false
* as the second argument to add.
*
* @param {Object} doc The document to add to the index.
* @param {Boolean} emitEvent Whether or not to emit events, default true.
* @memberOf Index
*/
lunr.Index.prototype.add = function (doc, emitEvent) {
var docTokens = {},
allDocumentTokens = new lunr.SortedSet,
docRef = doc[this._ref],
emitEvent = emitEvent === undefined ? true : emitEvent
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(this.tokenizerFn(doc[field.name]))
docTokens[field.name] = fieldTokens
for (var i = 0; i < fieldTokens.length; i++) {
var token = fieldTokens[i]
allDocumentTokens.add(token)
this.corpusTokens.add(token)
}
}, this)
this.documentStore.set(docRef, allDocumentTokens)
for (var i = 0; i < allDocumentTokens.length; i++) {
var token = allDocumentTokens.elements[i]
var tf = 0;
for (var j = 0; j < this._fields.length; j++){
var field = this._fields[j]
var fieldTokens = docTokens[field.name]
var fieldLength = fieldTokens.length
if (!fieldLength) continue
var tokenCount = 0
for (var k = 0; k < fieldLength; k++){
if (fieldTokens[k] === token){
tokenCount++
}
}
tf += (tokenCount / fieldLength * field.boost)
}
this.tokenStore.add(token, { ref: docRef, tf: tf })
};
if (emitEvent) this.eventEmitter.emit('add', doc, this)
}
/**
* Removes a document from the index.
*
* To make sure documents no longer show up in search results they can be
* removed from the index using this method.
*
* The document passed only needs to have the same ref property value as the
* document that was added to the index, they could be completely different
* objects.
*
* A 'remove' event is emitted with the document that has been removed and the index
* the document has been removed from. This event can be silenced by passing false
* as the second argument to remove.
*
* @param {Object} doc The document to remove from the index.
* @param {Boolean} emitEvent Whether to emit remove events, defaults to true
* @memberOf Index
*/
lunr.Index.prototype.remove = function (doc, emitEvent) {
var docRef = doc[this._ref],
emitEvent = emitEvent === undefined ? true : emitEvent
if (!this.documentStore.has(docRef)) return
var docTokens = this.documentStore.get(docRef)
this.documentStore.remove(docRef)
docTokens.forEach(function (token) {
this.tokenStore.remove(token, docRef)
}, this)
if (emitEvent) this.eventEmitter.emit('remove', doc, this)
}
/**
* Updates a document in the index.
*
* When a document contained within the index gets updated, fields changed,
* added or removed, to make sure it correctly matched against search queries,
* it should be updated in the index.
*
* This method is just a wrapper around `remove` and `add`
*
* An 'update' event is emitted with the document that has been updated and the index.
* This event can be silenced by passing false as the second argument to update. Only
* an update event will be fired, the 'add' and 'remove' events of the underlying calls
* are silenced.
*
* @param {Object} doc The document to update in the index.
* @param {Boolean} emitEvent Whether to emit update events, defaults to true
* @see Index.prototype.remove
* @see Index.prototype.add
* @memberOf Index
*/
lunr.Index.prototype.update = function (doc, emitEvent) {
var emitEvent = emitEvent === undefined ? true : emitEvent
this.remove(doc, false)
this.add(doc, false)
if (emitEvent) this.eventEmitter.emit('update', doc, this)
}
/**
* Calculates the inverse document frequency for a token within the index.
*
* @param {String} token The token to calculate the idf of.
* @see Index.prototype.idf
* @private
* @memberOf Index
*/
lunr.Index.prototype.idf = function (term) {
var cacheKey = "@" + term
if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]
var documentFrequency = this.tokenStore.count(term),
idf = 1
if (documentFrequency > 0) {
idf = 1 + Math.log(this.documentStore.length / documentFrequency)
}
return this._idfCache[cacheKey] = idf
}
/**
* Searches the index using the passed query.
*
* Queries should be a string, multiple words are allowed and will lead to an
* AND based query, e.g. `idx.search('foo bar')` will run a search for
* documents containing both 'foo' and 'bar'.
*
* All query tokens are passed through the same pipeline that document tokens
* are passed through, so any language processing involved will be run on every
* query term.
*
* Each query term is expanded, so that the term 'he' might be expanded to
* 'hello' and 'help' if those terms were already included in the index.
*
* Matching documents are returned as an array of objects, each object contains
* the matching document ref, as set for this index, and the similarity score
* for this document against the query.
*
* @param {String} query The query to search the index with.
* @returns {Object}
* @see Index.prototype.idf
* @see Index.prototype.documentVector
* @memberOf Index
*/
lunr.Index.prototype.search = function (query) {
var queryTokens = this.pipeline.run(this.tokenizerFn(query)),
queryVector = new lunr.Vector,
documentSets = [],
fieldBoosts = this._fields.reduce(function (memo, f) { return memo + f.boost }, 0)
var hasSomeToken = queryTokens.some(function (token) {
return this.tokenStore.has(token)
}, this)
if (!hasSomeToken) return []
queryTokens
.forEach(function (token, i, tokens) {
var tf = 1 / tokens.length * this._fields.length * fieldBoosts,
self = this
var set = this.tokenStore.expand(token).reduce(function (memo, key) {
var pos = self.corpusTokens.indexOf(key),
idf = self.idf(key),
similarityBoost = 1,
set = new lunr.SortedSet
// if the expanded key is not an exact match to the token then
// penalise the score for this key by how different the key is
// to the token.
if (key !== token) {
var diff = Math.max(3, key.length - token.length)
similarityBoost = 1 / Math.log(diff)
}
// calculate the query tf-idf score for this token
// applying an similarityBoost to ensure exact matches
// these rank higher than expanded terms
if (pos > -1) queryVector.insert(pos, tf * idf * similarityBoost)
// add all the documents that have this key into a set
// ensuring that the type of key is preserved
var matchingDocuments = self.tokenStore.get(key),
refs = Object.keys(matchingDocuments),
refsLen = refs.length
for (var i = 0; i < refsLen; i++) {
set.add(matchingDocuments[refs[i]].ref)
}
return memo.union(set)
}, new lunr.SortedSet)
documentSets.push(set)
}, this)
var documentSet = documentSets.reduce(function (memo, set) {
return memo.intersect(set)
})
return documentSet
.map(function (ref) {
return { ref: ref, score: queryVector.similarity(this.documentVector(ref)) }
}, this)
.sort(function (a, b) {
return b.score - a.score
})
}
/**
* Generates a vector containing all the tokens in the document matching the
* passed documentRef.
*
* The vector contains the tf-idf score for each token contained in the
* document with the passed documentRef. The vector will contain an element
* for every token in the indexes corpus, if the document does not contain that
* token the element will be 0.
*
* @param {Object} documentRef The ref to find the document with.
* @returns {lunr.Vector}
* @private
* @memberOf Index
*/
lunr.Index.prototype.documentVector = function (documentRef) {
var documentTokens = this.documentStore.get(documentRef),
documentTokensLength = documentTokens.length,
documentVector = new lunr.Vector
for (var i = 0; i < documentTokensLength; i++) {
var token = documentTokens.elements[i],
tf = this.tokenStore.get(token)[documentRef].tf,
idf = this.idf(token)
documentVector.insert(this.corpusTokens.indexOf(token), tf * idf)
};
return documentVector
}
/**
* Returns a representation of the index ready for serialisation.
*
* @returns {Object}
* @memberOf Index
*/
lunr.Index.prototype.toJSON = function () {
return {
version: lunr.version,
fields: this._fields,
ref: this._ref,
tokenizer: this.tokenizerFn.label,
documentStore: this.documentStore.toJSON(),
tokenStore: this.tokenStore.toJSON(),
corpusTokens: this.corpusTokens.toJSON(),
pipeline: this.pipeline.toJSON()
}
}
/**
* Applies a plugin to the current index.
*
* A plugin is a function that is called with the index as its context.
* Plugins can be used to customise or extend the behaviour the index
* in some way. A plugin is just a function, that encapsulated the custom
* behaviour that should be applied to the index.
*
* The plugin function will be called with the index as its argument, additional
* arguments can also be passed when calling use. The function will be called
* with the index as its context.
*
* Example:
*
* var myPlugin = function (idx, arg1, arg2) {
* // `this` is the index to be extended
* // apply any extensions etc here.
* }
*
* var idx = lunr(function () {
* this.use(myPlugin, 'arg1', 'arg2')
* })
*
* @param {Function} plugin The plugin to apply.
* @memberOf Index
*/
lunr.Index.prototype.use = function (plugin) {
var args = Array.prototype.slice.call(arguments, 1)
args.unshift(this)
plugin.apply(this, args)
}