/* eslint no-loop-func: 0 */ /** * Mnemonist SymSpell * =================== * * JavaScript implementation of the Symmetric Delete Spelling dictionary to * efficiently index & query expression based on edit distance. * Note that the current implementation target the v3.0 of the algorithm. * * [Reference]: * http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/ * https://github.com/wolfgarbe/symspell * * [Author]: * Wolf Garbe */ var forEach = require('obliterator/foreach'); /** * Constants. */ var DEFAULT_MAX_DISTANCE = 2, DEFAULT_VERBOSITY = 2; var VERBOSITY = new Set([ // Returns only the top suggestion 0, // Returns suggestions with the smallest edit distance 1, // Returns every suggestion (no early termination) 2 ]); var VERBOSITY_EXPLANATIONS = { 0: 'Returns only the top suggestion', 1: 'Returns suggestions with the smallest edit distance', 2: 'Returns every suggestion (no early termination)' }; /** * Functions. */ /** * Function creating a dictionary item. * * @param {number} [value] - An optional suggestion. * @return {object} - The created item. */ function createDictionaryItem(value) { var suggestions = new Set(); if (typeof value === 'number') suggestions.add(value); return { suggestions, count: 0 }; } /** * Function creating a suggestion item. * * @return {object} - The created item. */ function createSuggestionItem(term, distance, count) { return { term: term || '', distance: distance || 0, count: count || 0 }; } /** * Simplified edit function. * * @param {string} word - Target word. * @param {number} distance - Distance. * @param {number} max - Max distance. * @param {Set} [deletes] - Set mutated to store deletes. */ function edits(word, distance, max, deletes) { deletes = deletes || new Set(); distance++; var deletedItem, l = word.length, i; if (l > 1) { for (i = 0; i < l; i++) { deletedItem = word.substring(0, i) + word.substring(i + 1); if (!deletes.has(deletedItem)) { deletes.add(deletedItem); if (distance < max) edits(deletedItem, distance, max, deletes); } } } return deletes; } /** * Function used to conditionally add suggestions. * * @param {array} words - Words list. * @param {number} verbosity - Verbosity level. * @param {object} item - The target item. * @param {string} suggestion - The target suggestion. * @param {number} int - Integer key of the word. * @param {object} deletedItem - Considered deleted item. * @param {SymSpell} */ function addLowestDistance(words, verbosity, item, suggestion, int, deletedItem) { var first = item.suggestions.values().next().value; if (verbosity < 2 && item.suggestions.size > 0 && words[first].length - deletedItem.length > suggestion.length - deletedItem.length) { item.suggestions = new Set(); item.count = 0; } if (verbosity === 2 || !item.suggestions.size || words[first].length - deletedItem.length >= suggestion.length - deletedItem.length) { item.suggestions.add(int); } } /** * Custom Damerau-Levenshtein used by the algorithm. * * @param {string} source - First string. * @param {string} target - Second string. * @return {number} - The distance. */ function damerauLevenshtein(source, target) { var m = source.length, n = target.length, H = [[]], INF = m + n, sd = new Map(), i, l, j; H[0][0] = INF; for (i = 0; i <= m; i++) { if (!H[i + 1]) H[i + 1] = []; H[i + 1][1] = i; H[i + 1][0] = INF; } for (j = 0; j <= n; j++) { H[1][j + 1] = j; H[0][j + 1] = INF; } var st = source + target, letter; for (i = 0, l = st.length; i < l; i++) { letter = st[i]; if (!sd.has(letter)) sd.set(letter, 0); } // Iterating for (i = 1; i <= m; i++) { var DB = 0; for (j = 1; j <= n; j++) { var i1 = sd.get(target[j - 1]), j1 = DB; if (source[i - 1] === target[j - 1]) { H[i + 1][j + 1] = H[i][j]; DB = j; } else { H[i + 1][j + 1] = Math.min( H[i][j], H[i + 1][j], H[i][j + 1] ) + 1; } H[i + 1][j + 1] = Math.min( H[i + 1][j + 1], H[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1) ); } sd.set(source[i - 1], i); } return H[m + 1][n + 1]; } /** * Lookup function. * * @param {object} dictionary - A SymSpell dictionary. * @param {array} words - Unique words list. * @param {number} verbosity - Verbosity level. * @param {number} maxDistance - Maximum distance. * @param {number} maxLength - Maximum word length in the dictionary. * @param {string} input - Input string. * @return {array} - The list of suggestions. */ function lookup(dictionary, words, verbosity, maxDistance, maxLength, input) { var length = input.length; if (length - maxDistance > maxLength) return []; var candidates = [input], candidateSet = new Set(), suggestionSet = new Set(); var suggestions = [], candidate, item; // Exhausting every candidates while (candidates.length > 0) { candidate = candidates.shift(); // Early termination if ( verbosity < 2 && suggestions.length > 0 && length - candidate.length > suggestions[0].distance ) break; item = dictionary[candidate]; if (item !== undefined) { if (typeof item === 'number') item = createDictionaryItem(item); if (item.count > 0 && !suggestionSet.has(candidate)) { suggestionSet.add(candidate); var suggestItem = createSuggestionItem( candidate, length - candidate.length, item.count ); suggestions.push(suggestItem); // Another early termination if (verbosity < 2 && length - candidate.length === 0) break; } // Iterating over the item's suggestions item.suggestions.forEach(index => { var suggestion = words[index]; // Do we already have this suggestion? if (suggestionSet.has(suggestion)) return; suggestionSet.add(suggestion); // Computing distance between candidate & suggestion var distance = 0; if (input !== suggestion) { if (suggestion.length === candidate.length) { distance = length - candidate.length; } else if (length === candidate.length) { distance = suggestion.length - candidate.length; } else { var ii = 0, jj = 0; var l = suggestion.length; while ( ii < l && ii < length && suggestion[ii] === input[ii] ) { ii++; } while ( jj < l - ii && jj < length && suggestion[l - jj - 1] === input[length - jj - 1] ) { jj++; } if (ii > 0 || jj > 0) { distance = damerauLevenshtein( suggestion.substr(ii, l - ii - jj), input.substr(ii, length - ii - jj) ); } else { distance = damerauLevenshtein(suggestion, input); } } } // Removing suggestions of higher distance if (verbosity < 2 && suggestions.length > 0 && suggestions[0].distance > distance) { suggestions = []; } if (verbosity < 2 && suggestions.length > 0 && distance > suggestions[0].distance) { return; } if (distance <= maxDistance) { var target = dictionary[suggestion]; if (target !== undefined) { suggestions.push(createSuggestionItem( suggestion, distance, target.count )); } } }); } // Adding edits if (length - candidate.length < maxDistance) { if (verbosity < 2 && suggestions.length > 0 && length - candidate.length >= suggestions[0].distance) continue; for (var i = 0, l = candidate.length; i < l; i++) { var deletedItem = ( candidate.substring(0, i) + candidate.substring(i + 1) ); if (!candidateSet.has(deletedItem)) { candidateSet.add(deletedItem); candidates.push(deletedItem); } } } } if (verbosity === 0) return suggestions.slice(0, 1); return suggestions; } /** * SymSpell. * * @constructor */ function SymSpell(options) { options = options || {}; this.clear(); // Properties this.maxDistance = typeof options.maxDistance === 'number' ? options.maxDistance : DEFAULT_MAX_DISTANCE; this.verbosity = typeof options.verbosity === 'number' ? options.verbosity : DEFAULT_VERBOSITY; // Sanity checks if (typeof this.maxDistance !== 'number' || this.maxDistance <= 0) throw Error('mnemonist/SymSpell.constructor: invalid `maxDistance` option. Should be a integer greater than 0.'); if (!VERBOSITY.has(this.verbosity)) throw Error('mnemonist/SymSpell.constructor: invalid `verbosity` option. Should be either 0, 1 or 2.'); } /** * Method used to clear the structure. * * @return {undefined} */ SymSpell.prototype.clear = function() { // Properties this.size = 0; this.dictionary = Object.create(null); this.maxLength = 0; this.words = []; }; /** * Method used to add a word to the index. * * @param {string} word - Word to add. * @param {SymSpell} */ SymSpell.prototype.add = function(word) { var item = this.dictionary[word]; if (item !== undefined) { if (typeof item === 'number') { item = createDictionaryItem(item); this.dictionary[word] = item; } item.count++; } else { item = createDictionaryItem(); item.count++; this.dictionary[word] = item; if (word.length > this.maxLength) this.maxLength = word.length; } if (item.count === 1) { var number = this.words.length; this.words.push(word); var deletes = edits(word, 0, this.maxDistance); deletes.forEach(deletedItem => { var target = this.dictionary[deletedItem]; if (target !== undefined) { if (typeof target === 'number') { target = createDictionaryItem(target); this.dictionary[deletedItem] = target; } if (!target.suggestions.has(number)) { addLowestDistance( this.words, this.verbosity, target, word, number, deletedItem ); } } else { this.dictionary[deletedItem] = number; } }); } this.size++; return this; }; /** * Method used to search the index. * * @param {string} input - Input query. * @return {array} - The found suggestions. */ SymSpell.prototype.search = function(input) { return lookup( this.dictionary, this.words, this.verbosity, this.maxDistance, this.maxLength, input ); }; /** * Convenience known methods. */ SymSpell.prototype.inspect = function() { var array = []; array.size = this.size; array.maxDistance = this.maxDistance; array.verbosity = this.verbosity; array.behavior = VERBOSITY_EXPLANATIONS[this.verbosity]; for (var k in this.dictionary) { if (typeof this.dictionary[k] === 'object' && this.dictionary[k].count) array.push([k, this.dictionary[k].count]); } // Trick so that node displays the name of the constructor Object.defineProperty(array, 'constructor', { value: SymSpell, enumerable: false }); return array; }; if (typeof Symbol !== 'undefined') SymSpell.prototype[Symbol.for('nodejs.util.inspect.custom')] = SymSpell.prototype.inspect; /** * Static @.from function taking an arbitrary iterable & converting it into * a structure. * * @param {Iterable} iterable - Target iterable. * @return {SymSpell} */ SymSpell.from = function(iterable, options) { var index = new SymSpell(options); forEach(iterable, function(value) { index.add(value); }); return index; }; /** * Exporting. */ module.exports = SymSpell;