warn
* error
, debug
* @access protected
*/
log(msg, level = 'debug') {
const log = this.opt.log;
if (!this.opt.debug) {
return;
}
if (typeof log === 'object' && typeof log[level] === 'function') {
log[level](`mark.js: ${msg}`);
}
}
/**
* Escapes a string for usage within a regular expression
* @param {string} str - The string to escape
* @return {string}
* @access protected
*/
escapeStr(str) {
// eslint-disable-next-line no-useless-escape
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
}
/**
* Creates a regular expression string to match the specified search
* term including synonyms, diacritics and accuracy if defined
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createRegExp(str) {
if (this.opt.wildcards !== 'disabled') {
str = this.setupWildcardsRegExp(str);
}
str = this.escapeStr(str);
if (Object.keys(this.opt.synonyms).length) {
str = this.createSynonymsRegExp(str);
}
if (this.opt.ignoreJoiners || this.opt.ignorePunctuation.length) {
str = this.setupIgnoreJoinersRegExp(str);
}
if (this.opt.diacritics) {
str = this.createDiacriticsRegExp(str);
}
str = this.createMergedBlanksRegExp(str);
if (this.opt.ignoreJoiners || this.opt.ignorePunctuation.length) {
str = this.createJoinersRegExp(str);
}
if (this.opt.wildcards !== 'disabled') {
str = this.createWildcardsRegExp(str);
}
str = this.createAccuracyRegExp(str);
return str;
}
/**
* Creates a regular expression string to match the defined synonyms
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createSynonymsRegExp(str) {
const syn = this.opt.synonyms,
sens = this.opt.caseSensitive ? '' : 'i',
// add replacement character placeholder before and after the
// synonym group
joinerPlaceholder = this.opt.ignoreJoiners ||
this.opt.ignorePunctuation.length ? '\u0000' : '';
for (let index in syn) {
if (syn.hasOwnProperty(index)) {
const value = syn[index],
k1 = this.opt.wildcards !== 'disabled' ?
this.setupWildcardsRegExp(index) :
this.escapeStr(index),
k2 = this.opt.wildcards !== 'disabled' ?
this.setupWildcardsRegExp(value) :
this.escapeStr(value);
if (k1 !== '' && k2 !== '') {
str = str.replace(
new RegExp(
`(${this.escapeStr(k1)}|${this.escapeStr(k2)})`,
`gm${sens}`
),
joinerPlaceholder +
`(${this.processSynomyms(k1)}|` +
`${this.processSynomyms(k2)})` +
joinerPlaceholder
);
}
}
}
return str;
}
/**
* Setup synonyms to work with ignoreJoiners and or ignorePunctuation
* @param {string} str - synonym key or value to process
* @return {string} - processed synonym string
*/
processSynomyms(str) {
if (this.opt.ignoreJoiners || this.opt.ignorePunctuation.length) {
str = this.setupIgnoreJoinersRegExp(str);
}
return str;
}
/**
* Sets up the regular expression string to allow later insertion of
* wildcard regular expression matches
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
setupWildcardsRegExp(str) {
// replace single character wildcard with unicode 0001
str = str.replace(/(?:\\)*\?/g, val => {
return val.charAt(0) === '\\' ? '?' : '\u0001';
});
// replace multiple character wildcard with unicode 0002
return str.replace(/(?:\\)*\*/g, val => {
return val.charAt(0) === '\\' ? '*' : '\u0002';
});
}
/**
* Sets up the regular expression string to allow later insertion of
* wildcard regular expression matches
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createWildcardsRegExp(str) {
// default to "enable" (i.e. to not include spaces)
// "withSpaces" uses `[\\S\\s]` instead of `.` because the latter
// does not match new line characters
let spaces = this.opt.wildcards === 'withSpaces';
return str
// replace unicode 0001 with a RegExp class to match any single
// character, or any single non-whitespace character depending
// on the setting
.replace(/\u0001/g, spaces ? '[\\S\\s]?' : '\\S?')
// replace unicode 0002 with a RegExp class to match zero or
// more characters, or zero or more non-whitespace characters
// depending on the setting
.replace(/\u0002/g, spaces ? '[\\S\\s]*?' : '\\S*');
}
/**
* Sets up the regular expression string to allow later insertion of
* designated characters (soft hyphens & zero width characters)
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
setupIgnoreJoinersRegExp(str) {
// adding a "null" unicode character as it will not be modified by the
// other "create" regular expression functions
return str.replace(/[^(|)\\]/g, (val, indx, original) => {
// don't add a null after an opening "(", around a "|" or before
// a closing "(", or between an escapement (e.g. \+)
let nextChar = original.charAt(indx + 1);
if (/[(|)\\]/.test(nextChar) || nextChar === '') {
return val;
} else {
return val + '\u0000';
}
});
}
/**
* Creates a regular expression string to allow ignoring of designated
* characters (soft hyphens, zero width characters & punctuation) based on
* the specified option values of ignorePunctuation
and
* ignoreJoiners
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createJoinersRegExp(str) {
let joiner = [];
const ignorePunctuation = this.opt.ignorePunctuation;
if (Array.isArray(ignorePunctuation) && ignorePunctuation.length) {
joiner.push(this.escapeStr(ignorePunctuation.join('')));
}
if (this.opt.ignoreJoiners) {
// u+00ad = soft hyphen
// u+200b = zero-width space
// u+200c = zero-width non-joiner
// u+200d = zero-width joiner
joiner.push('\\u00ad\\u200b\\u200c\\u200d');
}
return joiner.length ?
str.split(/\u0000+/).join(`[${joiner.join('')}]*`) :
str;
}
/**
* Creates a regular expression string to match diacritics
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createDiacriticsRegExp(str) {
const sens = this.opt.caseSensitive ? '' : 'i',
dct = this.opt.caseSensitive ? [
'aàáảãạăằắẳẵặâầấẩẫậäåāą', 'AÀÁẢÃẠĂẰẮẲẴẶÂẦẤẨẪẬÄÅĀĄ',
'cçćč', 'CÇĆČ', 'dđď', 'DĐĎ',
'eèéẻẽẹêềếểễệëěēę', 'EÈÉẺẼẸÊỀẾỂỄỆËĚĒĘ',
'iìíỉĩịîïī', 'IÌÍỈĨỊÎÏĪ', 'lł', 'LŁ', 'nñňń',
'NÑŇŃ', 'oòóỏõọôồốổỗộơởỡớờợöøō', 'OÒÓỎÕỌÔỒỐỔỖỘƠỞỠỚỜỢÖØŌ',
'rř', 'RŘ', 'sšśșş', 'SŠŚȘŞ',
'tťțţ', 'TŤȚŢ', 'uùúủũụưừứửữựûüůū', 'UÙÚỦŨỤƯỪỨỬỮỰÛÜŮŪ',
'yýỳỷỹỵÿ', 'YÝỲỶỸỴŸ', 'zžżź', 'ZŽŻŹ'
] : [
'aàáảãạăằắẳẵặâầấẩẫậäåāąAÀÁẢÃẠĂẰẮẲẴẶÂẦẤẨẪẬÄÅĀĄ', 'cçćčCÇĆČ',
'dđďDĐĎ', 'eèéẻẽẹêềếểễệëěēęEÈÉẺẼẸÊỀẾỂỄỆËĚĒĘ',
'iìíỉĩịîïīIÌÍỈĨỊÎÏĪ', 'lłLŁ', 'nñňńNÑŇŃ',
'oòóỏõọôồốổỗộơởỡớờợöøōOÒÓỎÕỌÔỒỐỔỖỘƠỞỠỚỜỢÖØŌ', 'rřRŘ',
'sšśșşSŠŚȘŞ', 'tťțţTŤȚŢ',
'uùúủũụưừứửữựûüůūUÙÚỦŨỤƯỪỨỬỮỰÛÜŮŪ', 'yýỳỷỹỵÿYÝỲỶỸỴŸ', 'zžżźZŽŻŹ'
];
let handled = [];
str.split('').forEach(ch => {
dct.every(dct => {
// Check if the character is inside a diacritics list
if (dct.indexOf(ch) !== -1) {
// Check if the related diacritics list was not
// handled yet
if (handled.indexOf(dct) > -1) {
return false;
}
// Make sure that the character OR any other
// character in the diacritics list will be matched
str = str.replace(
new RegExp(`[${dct}]`, `gm${sens}`), `[${dct}]`
);
handled.push(dct);
}
return true;
});
});
return str;
}
/**
* Creates a regular expression string that merges whitespace characters
* including subsequent ones into a single pattern, one or multiple
* whitespaces
* @param {string} str - The search term to be used
* @return {string}
* @access protected
*/
createMergedBlanksRegExp(str) {
return str.replace(/[\s]+/gmi, '[\\s]+');
}
/**
* Creates a regular expression string to match the specified string with
* the defined accuracy. As in the regular expression of "exactly" can be
* a group containing a blank at the beginning, all regular expressions will
* be created with two groups. The first group can be ignored (may contain
* the said blank), the second contains the actual match
* @param {string} str - The searm term to be used
* @return {str}
* @access protected
*/
createAccuracyRegExp(str) {
const chars = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~¡¿';
let acc = this.opt.accuracy,
val = typeof acc === 'string' ? acc : acc.value,
ls = typeof acc === 'string' ? [] : acc.limiters,
lsJoin = '';
ls.forEach(limiter => {
lsJoin += `|${this.escapeStr(limiter)}`;
});
switch (val) {
case 'partially':
default:
return `()(${str})`;
case 'complementary':
lsJoin = '\\s' + (lsJoin ? lsJoin : this.escapeStr(chars));
return `()([^${lsJoin}]*${str}[^${lsJoin}]*)`;
case 'exactly':
return `(^|\\s${lsJoin})(${str})(?=$|\\s${lsJoin})`;
}
}
/**
* @typedef Mark~separatedKeywords
* @type {object.["-", ","]
*/
/**
* @typedef Mark~markAccuracySetting
* @type {string}
* @property {"partially"|"complementary"|"exactly"|Mark~markAccuracyObject}
* [accuracy="partially"] - Either one of the following string values:
* ["'"]
would match "Worlds", "World's" and
* "Wo'rlds"ignorePunctuation: ":;.,-–—‒_(){}[]!'\"+=".split(""),This * setting includes common punctuation as well as a minus, en-dash, * em-dash and figure-dash * ({@link https://en.wikipedia.org/wiki/Dash#Figure_dash ref}), as well * as an underscore.