import SearchableMap from './SearchableMap/SearchableMap.js' const OR = 'or' const AND = 'and' /** * MiniSearch is the main entrypoint class, and represents a full-text search * engine. * * @example * const documents = [ * { * id: 1, * title: 'Moby Dick', * text: 'Call me Ishmael. Some years ago...', * category: 'fiction' * }, * { * id: 2, * title: 'Zen and the Art of Motorcycle Maintenance', * text: 'I can see by my watch...', * category: 'fiction' * }, * { * id: 3, * title: 'Neuromancer', * text: 'The sky above the port was...', * category: 'fiction' * }, * { * id: 4, * title: 'Zen and the Art of Archery', * text: 'At first sight it must seem...', * category: 'non-fiction' * }, * // ...and more * ] * * // Create a search engine that indexes the 'title' and 'text' fields for * // full-text search. Search results will include 'title' and 'category' (plus the * // id field, that is always stored and returned) * const miniSearch = MiniSearch.new({ * fields: ['title', 'text'], * storeFields: ['title', 'category'] * }) * * // Add documents to the index * miniSearch.addAll(documents) * * // Search for documents: * let results = miniSearch.search('zen art motorcycle') * // => [ * { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 }, * { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 } * ] * */ class MiniSearch { /** * @callback MiniSearch~extractField * @param {Object} document - A document object * @param {string} fieldName - Name of the field to extract * @return string - Value of the field */ /** * @callback MiniSearch~tokenize * @param {string} text - Text to tokenize * @param {?string} fieldName - Name of the field to tokenize * @return string[] - Tokenized terms */ /** * @callback MiniSearch~processTerm * @param {string} text - The text to tokenize * @param {?string} fieldName - The name of the field to tokenize * @return string|null|undefined|false - Processed term, or a falsy value to discard the term */ /** * @param {Object} options - Configuration options * @param {Array} options.fields - Fields to be indexed. Required. * @param {string} [options.idField='id'] - ID field, uniquely identifying a document * @param {Array} [options.storeFields] - Fields to store, so that search results would include them. By default none, so resuts would only contain the id field. * @param {MiniSearch~extractField} [options.extractField] - Function used to get the value of a field in a document * @param {MiniSearch~tokenize} [options.tokenize] - Function used to split a field into individual terms * @param {MiniSearch~processTerm} [options.processTerm] - Function used to process a term before indexing it or searching * @param {Object} [options.searchOptions] - Default search options (see the `search` method for details) * * @example * // Create a search engine that indexes the 'title' and 'text' fields of your * // documents: * const miniSearch = MiniSearch.new({ fields: ['title', 'text'] }) * * @example * // Your documents are assumed to include a unique 'id' field, but if you want * // to use a different field for document identification, you can set the * // 'idField' option: * const miniSearch = MiniSearch.new({ idField: 'key', fields: ['title', 'text'] }) * * @example * // The full set of options (here with their default value) is: * const miniSearch = MiniSearch.new({ * // idField: field that uniquely identifies a document * idField: 'id', * * // extractField: function used to get the value of a field in a document. * // By default, it assumes the document is a flat object with field names as * // property keys and field values as string property values, but custom logic * // can be implemented by setting this option to a custom extractor function. * extractField: (document, fieldName) => document[fieldName], * * // tokenize: function used to split fields into individual terms. By * // default, it is also used to tokenize search queries, unless a specific * // `tokenize` search option is supplied. When tokenizing an indexed field, * // the field name is passed as the second argument. * tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION), * * // processTerm: function used to process each tokenized term before * // indexing. It can be used for stemming and normalization. Return a falsy * // value in order to discard a term. By default, it is also used to process * // search queries, unless a specific `processTerm` option is supplied as a * // search option. When processing a term from a indexed field, the field * // name is passed as the second argument. * processTerm: (term, _fieldName) => term.toLowerCase(), * * // searchOptions: default search options, see the `search` method for * // details * searchOptions: undefined, * * // fields: document fields to be indexed. Mandatory, but not set by default * fields: undefined * * // storeFields: document fields to be stored and returned as part of the * // search results. * storeFields: [] * }) */ constructor (options = {}) { /** @private */ this._options = { ...defaultOptions, ...options } this._options.searchOptions = { ...defaultSearchOptions, ...(this._options.searchOptions || {}) } const { fields } = this._options if (fields == null) { throw new Error('MiniSearch: option "fields" must be provided') } /** @private */ this._index = new SearchableMap() /** @private */ this._documentCount = 0 /** @private */ this._documentIds = {} /** @private */ this._fieldIds = {} /** @private */ this._fieldLength = {} /** @private */ this._averageFieldLength = {} /** @private */ this._nextId = 0 /** @private */ this._storedFields = {} addFields(this, fields) } /** * Adds a document to the index * * @param {Object} document - the document to be indexed */ add (document) { const { extractField, tokenize, processTerm, fields, idField } = this._options const id = extractField(document, idField) if (id == null) { throw new Error(`MiniSearch: document does not have ID field "${idField}"`) } const shortDocumentId = addDocumentId(this, id) saveStoredFields(this, shortDocumentId, document) fields.forEach(field => { const fieldValue = extractField(document, field) if (fieldValue == null) { return } const tokens = tokenize(fieldValue.toString(), field) addFieldLength(this, shortDocumentId, this._fieldIds[field], this.documentCount - 1, tokens.length) tokens.forEach(term => { const processedTerm = processTerm(term, field) if (isTruthy(processedTerm)) { addTerm(this, this._fieldIds[field], shortDocumentId, processedTerm) } }) }) } /** * Adds all the given documents to the index * * @param {Object[]} documents - an array of documents to be indexed */ addAll (documents) { documents.forEach(document => this.add(document)) } /** * Adds all the given documents to the index asynchronously. * * Returns a promise that resolves to undefined when the indexing is done. This * method is useful when index many documents, to avoid blocking the main * thread. The indexing is performed asynchronously and in chunks. * * @param {Object[]} documents - an array of documents to be indexed * @param {Object} [options] - Configuration options * @param {number} [options.chunkSize] - Size of the document chunks indexed, 10 by default * @return {Promise} A promise resolving to `null` when the indexing is done */ addAllAsync (documents, options = {}) { const { chunkSize = 10 } = options const acc = { chunk: [], promise: Promise.resolve(null) } const { chunk, promise } = documents.reduce(({ chunk, promise }, document, i) => { chunk.push(document) if ((i + 1) % chunkSize === 0) { return { chunk: [], promise: promise.then(() => this.addAll(chunk)) } } else { return { chunk, promise } } }, acc) return promise.then(() => this.addAll(chunk)) } /** * Removes the given document from the index. * * The document to delete must NOT have changed between indexing and deletion, * otherwise the index will be corrupted. Therefore, when reindexing a document * after a change, the correct order of operations is: * * 1. remove old version * 2. apply changes * 3. index new version * * @param {Object} document - the document to be removed */ remove (document) { const { tokenize, processTerm, extractField, fields, idField } = this._options const id = extractField(document, idField) if (id == null) { throw new Error(`MiniSearch: document does not have ID field "${idField}"`) } const [shortDocumentId] = Object.entries(this._documentIds) .find(([_, longId]) => id === longId) || [] if (shortDocumentId == null) { throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`) } fields.forEach(field => { const fieldValue = extractField(document, field) if (fieldValue == null) { return } const tokens = tokenize(fieldValue.toString(), field) tokens.forEach(term => { const processedTerm = processTerm(term, field) if (isTruthy(processedTerm)) { removeTerm(this, this._fieldIds[field], shortDocumentId, processedTerm) } }) }) delete this._storedFields[shortDocumentId] delete this._documentIds[shortDocumentId] this._documentCount -= 1 } /** * Removes all the given documents from the index. If called with no arguments, * it removes _all_ documents from the index. * * @param {Array} [documents] - the documents to be removed */ removeAll (documents) { if (arguments.length === 0) { this._index = new SearchableMap() this._documentCount = 0 this._documentIds = {} this._fieldLength = {} this._averageFieldLength = {} this._storedFields = {} this._nextId = 0 } else { documents.forEach(document => this.remove(document)) } } /** * @callback MiniSearch~prefixFn * @param {string} term - Search term * @param {number} i - Index of the term in the query terms array * @param {string[]} terms - Array of all query terms * @return boolean - `true` to perform prefix search, `false` to not perform it */ /** * @callback MiniSearch~fuzzyFn * @param {string} term - Search term * @param {number} i - Index of the search term in the tokenized search query * @param {string[]} terms - Array of all query terms * @return number|false - Maximum edit distance, or `false` to not perform fuzzy search */ /** * @callback MiniSearch~filter * @param {Object} result - A search result * @return boolean - `true` to keep the result, `false` to filter it out */ /** * Search for documents matching the given search query. * * The result is a list of scored document IDs matching the query, sorted by * descending score, and each including data about which terms were matched and * in which fields. * * @param {string} queryString - Query string to search for * @param {Object} [options] - Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default. * @param {Array} [options.fields] - Fields to search in. If omitted, all fields are searched * @param {Object} [options.boost] - Key-value object of boosting values for fields * @param {boolean|MiniSearch~prefixFn} [options.prefix=false] - Whether to perform prefix search. Value can be a boolean, or a function computing the boolean from each tokenized and processed query term. If a function is given, it is called with the following arguments: `term: string` - the query term; `i: number` - the term index in the query terms; `terms: Array` - the array of query terms. * @param {number|false|MiniSearch~fuzzyFn} [options.fuzzy=false] - If set to a number greater than or equal 1, it performs fuzzy search within a maximum edit distance equal to that value. If set to a number less than 1, it performs fuzzy search with a maximum edit distance equal to the term length times the value, rouded at the nearest integer. If set to a function, it calls the function for each tokenized and processed query term and expects a numeric value indicating the maximum edit distance, or a falsy falue if fuzzy search should not be performed. If a function is given, it is called with the following arguments: `term: string` - the query term; `i: number` - the term index in the query terms; `terms: Array` - the array of query terms. * @param {string} [options.combineWith='OR'] - How to combine term queries (it can be 'OR' or 'AND') * @param {MiniSearch~tokenize} [options.tokenize] - Function used to tokenize the search query. It defaults to the same tokenizer used for indexing. * @param {MiniSearch~processTerm} [options.processTerm] - Function used to process each search term. Return a falsy value to discard a term. Defaults to the same function used to process terms upon indexing. * @param {MiniSearch~filter} [options.filter] - Function used to filter search results, for example on the basis of stored fields * @return {Array<{ id: any, score: number, match: Object }>} A sorted array of scored document IDs matching the search * * @example * // Search for "zen art motorcycle" with default options: terms have to match * // exactly, and individual terms are joined with OR * miniSearch.search('zen art motorcycle') * // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ] * * @example * // Search only in the 'title' field * miniSearch.search('zen', { fields: ['title'] }) * * @example * // Boost a field * miniSearch.search('zen', { boost: { title: 2 } }) * * @example * // Search for "moto" with prefix search (it will match documents * // containing terms that start with "moto" or "neuro") * miniSearch.search('moto neuro', { prefix: true }) * * @example * // Search for "ismael" with fuzzy search (it will match documents containing * // terms similar to "ismael", with a maximum edit distance of 0.2 term.length * // (rounded to nearest integer) * miniSearch.search('ismael', { fuzzy: 0.2 }) * * @example * // Mix of exact match, prefix search, and fuzzy search * miniSearch.search('ismael mob', { * prefix: true, * fuzzy: 0.2 * }) * * @example * // Perform fuzzy and prefix search depending on the search term. Here * // performing prefix and fuzzy search only on terms longer than 3 characters * miniSearch.search('ismael mob', { * prefix: term => term.length > 3 * fuzzy: term => term.length > 3 ? 0.2 : null * }) * * @example * // Combine search terms with AND (to match only documents that contain both * // "motorcycle" and "art") * miniSearch.search('motorcycle art', { combineWith: 'AND' }) * * @example * // Filter only results in the 'fiction' category (assuming that 'category' * // is a stored field) * miniSearch.search('motorcycle art', { * filter: (result) => result.category === 'fiction' * }) */ search (queryString, options = {}) { const { tokenize, processTerm, searchOptions } = this._options options = { tokenize, processTerm, ...searchOptions, ...options } const { tokenize: searchTokenize, processTerm: searchProcessTerm } = options const queries = searchTokenize(queryString) .map((term) => searchProcessTerm(term)) .filter(isTruthy) .map(termToQuery(options)) const results = queries.map(query => this.executeQuery(query, options)) const combinedResults = this.combineResults(results, options.combineWith) return Object.entries(combinedResults) .reduce((results, [docId, { score, match, terms }]) => { const result = { id: this._documentIds[docId], terms: uniq(terms), score, match } Object.assign(result, this._storedFields[docId]) if (options.filter == null || options.filter(result)) { results.push(result) } return results }, []) .sort(({ score: a }, { score: b }) => a < b ? 1 : -1) } /** * Provide suggestions for the given search query * * The result is a list of suggested modified search queries, derived from the * given search query, each with a relevance score, sorted by descending score. * * @param {string} queryString - Query string to be expanded into suggestions * @param {Object} [options] - Search options. The supported options and default values are the same as for the `search` method, except that by default prefix search is performed on the last term in the query. * @return {Array<{ suggestion: string, score: number }>} A sorted array of suggestions sorted by relevance score. * * @example * // Get suggestions for 'neuro': * miniSearch.autoSuggest('neuro') * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ] * * @example * // Get suggestions for 'zen ar': * miniSearch.autoSuggest('zen ar') * // => [ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 }, * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 } * // ] * * @example * // Correct spelling mistakes using fuzzy search: * miniSearch.autoSuggest('neromancer', { fuzzy: 0.2 }) * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ] * * @example * // Get suggestions for 'zen ar', but only within the 'fiction' category * // (assuming that 'category' is a stored field): * miniSearch.autoSuggest('zen ar', { * filter: (result) => result.category === 'fiction' * }) * // => [ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 }, * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 } * // ] */ autoSuggest (queryString, options = {}) { options = { ...defaultAutoSuggestOptions, ...options } const suggestions = this.search(queryString, options).reduce((suggestions, { score, terms }) => { const phrase = terms.join(' ') if (suggestions[phrase] == null) { suggestions[phrase] = { score, terms, count: 1 } } else { suggestions[phrase].score += score suggestions[phrase].count += 1 } return suggestions }, {}) return Object.entries(suggestions) .map(([suggestion, { score, terms, count }]) => ({ suggestion, terms, score: score / count })) .sort(({ score: a }, { score: b }) => a < b ? 1 : -1) } /** * Number of documents in the index * * @type {number} */ get documentCount () { return this._documentCount } /** * Deserializes a JSON index (serialized with `miniSearch.toJSON()`) and * instantiates a MiniSearch instance. It should be given the same options * originally used when serializing the index. * * @example * // If the index was serialized with: * let miniSearch = new MiniSearch({ fields: ['title', 'text'] }) * miniSearch.addAll(documents) * * const json = JSON.stringify(miniSearch) * // It can later be deserialized like this: * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] }) * * @param {string} json - JSON-serialized index * @param {Object} options - configuration options, same as the constructor * @return {MiniSearch} an instance of MiniSearch */ static loadJSON (json, options) { if (options == null) { throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index') } return MiniSearch.loadJS(JSON.parse(json), options) } /** * Get the default value of an option. It will throw an error if no option with * the given name exists. * * @param {string} optionName - name of the option * @return {any} the default value of the given option * * @example * // Get default tokenizer * MiniSearch.getDefault('tokenize') * * @example * // Get default term processor * MiniSearch.getDefault('processTerm') * * @example * // Unknown options will throw an error * MiniSearch.getDefault('notExisting') * // => throws 'MiniSearch: unknown option "notExisting"' */ static getDefault (optionName) { if (defaultOptions.hasOwnProperty(optionName)) { return defaultOptions[optionName] } else { throw new Error(`MiniSearch: unknown option "${optionName}"`) } } /** * @private */ static loadJS (js, options = {}) { const { index, documentCount, nextId, documentIds, fieldIds, fieldLength, averageFieldLength, storedFields } = js const miniSearch = new MiniSearch(options) miniSearch._index = new SearchableMap(index._tree, index._prefix) miniSearch._documentCount = documentCount miniSearch._nextId = nextId miniSearch._documentIds = documentIds miniSearch._fieldIds = fieldIds miniSearch._fieldLength = fieldLength miniSearch._averageFieldLength = averageFieldLength miniSearch._fieldIds = fieldIds miniSearch._storedFields = storedFields || {} return miniSearch } /** * @private * @ignore */ executeQuery (query, options = {}) { options = { ...this._options.searchOptions, ...options } const boosts = (options.fields || this._options.fields).reduce((boosts, field) => ({ ...boosts, [field]: getOwnProperty(boosts, field) || 1 }), options.boost || {}) const { boostDocument, weights: { fuzzy: fuzzyWeight = 0.9, prefix: prefixWeight = 0.75 } } = options const exactMatch = termResults(this, query.term, boosts, boostDocument, this._index.get(query.term)) if (!query.fuzzy && !query.prefix) { return exactMatch } const results = [exactMatch] if (query.prefix) { this._index.atPrefix(query.term).forEach((term, data) => { const weightedDistance = (0.3 * (term.length - query.term.length)) / term.length results.push(termResults(this, term, boosts, boostDocument, data, prefixWeight, weightedDistance)) }) } if (query.fuzzy) { const maxDistance = query.fuzzy < 1 ? Math.round(query.term.length * query.fuzzy) : query.fuzzy Object.entries(this._index.fuzzyGet(query.term, maxDistance)).forEach(([term, [data, distance]]) => { const weightedDistance = distance / term.length results.push(termResults(this, term, boosts, boostDocument, data, fuzzyWeight, weightedDistance)) }) } return results.reduce(combinators[OR], {}) } /** * @private * @ignore */ combineResults (results, combineWith = OR) { if (results.length === 0) { return {} } const operator = combineWith.toLowerCase() return results.reduce(combinators[operator], null) } /** * Allows serialization of the index to JSON, to possibly store it and later * deserialize it with `MiniSearch.loadJSON`. * * Normally one does not directly call this method, but rather call the * standard JavaScript `JSON.stringify()` passing the `MiniSearch` instance, * and JavaScript will internally call this method. Upon deserialization, one * must pass to `loadJSON` the same options used to create the original * instance that was serialized. * * @example * // Serialize the index: * let miniSearch = new MiniSearch({ fields: ['title', 'text'] }) * miniSearch.addAll(documents) * const json = JSON.stringify(miniSearch) * * // Later, to deserialize it: * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] }) * * @return {Object} the serializeable representation of the search index */ toJSON () { return { index: this._index, documentCount: this._documentCount, nextId: this._nextId, documentIds: this._documentIds, fieldIds: this._fieldIds, fieldLength: this._fieldLength, averageFieldLength: this._averageFieldLength, storedFields: this._storedFields } } } MiniSearch.SearchableMap = SearchableMap const addTerm = function (self, fieldId, documentId, term) { self._index.update(term, indexData => { indexData = indexData || {} const fieldIndex = indexData[fieldId] || { df: 0, ds: {} } if (fieldIndex.ds[documentId] == null) { fieldIndex.df += 1 } fieldIndex.ds[documentId] = (fieldIndex.ds[documentId] || 0) + 1 return { ...indexData, [fieldId]: fieldIndex } }) } const removeTerm = function (self, fieldId, documentId, term) { if (!self._index.has(term)) { warnDocumentChanged(self, documentId, fieldId, term) return } self._index.update(term, indexData => { const fieldIndex = indexData[fieldId] if (fieldIndex == null || fieldIndex.ds[documentId] == null) { warnDocumentChanged(self, documentId, fieldId, term) return indexData } if (fieldIndex.ds[documentId] <= 1) { if (fieldIndex.df <= 1) { delete indexData[fieldId] return indexData } fieldIndex.df -= 1 } if (fieldIndex.ds[documentId] <= 1) { delete fieldIndex.ds[documentId] return indexData } fieldIndex.ds[documentId] -= 1 return { ...indexData, [fieldId]: fieldIndex } }) if (Object.keys(self._index.get(term)).length === 0) { self._index.delete(term) } } const warnDocumentChanged = function (self, shortDocumentId, fieldId, term) { if (console == null || console.warn == null) { return } const fieldName = Object.entries(self._fieldIds).find(([name, id]) => id === fieldId)[0] console.warn(`MiniSearch: document with ID ${self._documentIds[shortDocumentId]} has changed before removal: term "${term}" was not present in field "${fieldName}". Removing a document after it has changed can corrupt the index!`) } const addDocumentId = function (self, documentId) { const shortDocumentId = self._nextId.toString(36) self._documentIds[shortDocumentId] = documentId self._documentCount += 1 self._nextId += 1 return shortDocumentId } const addFields = function (self, fields) { fields.forEach((field, i) => { self._fieldIds[field] = i }) } const termResults = function (self, term, boosts, boostDocument, indexData, weight = 1, editDistance = 0) { if (indexData == null) { return {} } return Object.entries(boosts).reduce((results, [field, boost]) => { const fieldId = self._fieldIds[field] const { df, ds } = indexData[fieldId] || { ds: {} } Object.entries(ds).forEach(([documentId, tf]) => { const docBoost = boostDocument ? boostDocument(self._documentIds[documentId], term) : 1 if (!docBoost) { return } const normalizedLength = self._fieldLength[documentId][fieldId] / self._averageFieldLength[fieldId] results[documentId] = results[documentId] || { score: 0, match: {}, terms: [] } results[documentId].terms.push(term) results[documentId].match[term] = getOwnProperty(results[documentId].match, term) || [] results[documentId].score += docBoost * score(tf, df, self._documentCount, normalizedLength, boost, editDistance) results[documentId].match[term].push(field) }) return results }, {}) } const getOwnProperty = function (object, property) { return Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined } const addFieldLength = function (self, documentId, fieldId, count, length) { self._averageFieldLength[fieldId] = self._averageFieldLength[fieldId] || 0 const totalLength = (self._averageFieldLength[fieldId] * count) + length self._fieldLength[documentId] = self._fieldLength[documentId] || {} self._fieldLength[documentId][fieldId] = length self._averageFieldLength[fieldId] = totalLength / (count + 1) } const saveStoredFields = function (self, documentId, doc) { const { storeFields, extractField } = self._options if (storeFields == null || storeFields.length === 0) { return } self._storedFields[documentId] = self._storedFields[documentId] || {} storeFields.forEach((fieldName) => { const fieldValue = extractField(doc, fieldName) if (fieldValue === undefined) { return } self._storedFields[documentId][fieldName] = fieldValue }) } const combinators = { [OR]: function (a, b) { return Object.entries(b).reduce((combined, [documentId, { score, match, terms }]) => { if (combined[documentId] == null) { combined[documentId] = { score, match, terms } } else { combined[documentId].score += score combined[documentId].score *= 1.5 combined[documentId].terms = [...combined[documentId].terms, ...terms] Object.assign(combined[documentId].match, match) } return combined }, a || {}) }, [AND]: function (a, b) { if (a == null) { return b } return Object.entries(b).reduce((combined, [documentId, { score, match, terms }]) => { if (a[documentId] === undefined) { return combined } combined[documentId] = combined[documentId] || {} combined[documentId].score = a[documentId].score + score combined[documentId].match = { ...a[documentId].match, ...match } combined[documentId].terms = [...a[documentId].terms, ...terms] return combined }, {}) } } const tfIdf = function (tf, df, n) { return tf * Math.log(n / df) } const score = function (termFrequency, documentFrequency, documentCount, normalizedLength, boost, editDistance) { const weight = boost / (1 + (0.333 * boost * editDistance)) return weight * tfIdf(termFrequency, documentFrequency, documentCount) / normalizedLength } const termToQuery = (options) => (term, i, terms) => { const fuzzy = (typeof options.fuzzy === 'function') ? options.fuzzy(term, i, terms) : options.fuzzy const prefix = (typeof options.prefix === 'function') ? options.prefix(term, i, terms) : options.prefix return { term, fuzzy, prefix } } const uniq = function (array) { return array.filter((element, i, array) => array.indexOf(element) === i) } const isTruthy = (x) => !!x const defaultOptions = { idField: 'id', extractField: (document, fieldName) => document[fieldName], tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION), processTerm: (term, _fieldName) => term.toLowerCase(), fields: undefined, searchOptions: undefined, storeFields: [] } const defaultSearchOptions = { combineWith: OR, prefix: false, fuzzy: false, weights: {} } const defaultAutoSuggestOptions = { prefix: (term, i, terms) => i === terms.length - 1 } // This regular expression matches any Unicode space or punctuation character // Adapted from https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7BZ%7D%5Cp%7BP%7D&abb=on&c=on&esc=on const SPACE_OR_PUNCTUATION = /[\n\r -#%-*,-/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u export default MiniSearch