import { Tokenizer } from './Tokenizer.js'; import { defaults } from './defaults.js'; import { block, inline } from './rules.js'; import { repeatString } from './helpers.js'; /** * smartypants text replacement * @param {string} text */ function smartypants(text) { return text // em-dashes .replace(/---/g, '\u2014') // en-dashes .replace(/--/g, '\u2013') // opening singles .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') // closing singles & apostrophes .replace(/'/g, '\u2019') // opening doubles .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') // closing doubles .replace(/"/g, '\u201d') // ellipses .replace(/\.{3}/g, '\u2026'); } /** * mangle email addresses * @param {string} text */ function mangle(text) { let out = '', i, ch; const l = text.length; for (i = 0; i < l; i++) { ch = text.charCodeAt(i); if (Math.random() > 0.5) { ch = 'x' + ch.toString(16); } out += '&#' + ch + ';'; } return out; } /** * Block Lexer */ export class Lexer { constructor(options) { this.tokens = []; this.tokens.links = Object.create(null); this.options = options || defaults; this.options.tokenizer = this.options.tokenizer || new Tokenizer(); this.tokenizer = this.options.tokenizer; this.tokenizer.options = this.options; this.tokenizer.lexer = this; this.inlineQueue = []; this.state = { inLink: false, inRawBlock: false, top: true }; const rules = { block: block.normal, inline: inline.normal }; if (this.options.pedantic) { rules.block = block.pedantic; rules.inline = inline.pedantic; } else if (this.options.gfm) { rules.block = block.gfm; if (this.options.breaks) { rules.inline = inline.breaks; } else { rules.inline = inline.gfm; } } this.tokenizer.rules = rules; } /** * Expose Rules */ static get rules() { return { block, inline }; } /** * Static Lex Method */ static lex(src, options) { const lexer = new Lexer(options); return lexer.lex(src); } /** * Static Lex Inline Method */ static lexInline(src, options) { const lexer = new Lexer(options); return lexer.inlineTokens(src); } /** * Preprocessing */ lex(src) { src = src .replace(/\r\n|\r/g, '\n'); this.blockTokens(src, this.tokens); let next; while (next = this.inlineQueue.shift()) { this.inlineTokens(next.src, next.tokens); } return this.tokens; } /** * Lexing */ blockTokens(src, tokens = []) { if (this.options.pedantic) { src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); } else { src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { return leading + ' '.repeat(tabs.length); }); } let token, lastToken, cutSrc, lastParagraphClipped; while (src) { if (this.options.extensions && this.options.extensions.block && this.options.extensions.block.some((extTokenizer) => { if (token = extTokenizer.call({ lexer: this }, src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); return true; } return false; })) { continue; } // newline if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); if (token.raw.length === 1 && tokens.length > 0) { // if there's a single \n as a spacer, it's terminating the last line, // so move it there so that we don't get unecessary paragraph tags tokens[tokens.length - 1].raw += '\n'; } else { tokens.push(token); } continue; } // code if (token = this.tokenizer.code(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } continue; } // fences if (token = this.tokenizer.fences(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // heading if (token = this.tokenizer.heading(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // hr if (token = this.tokenizer.hr(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // blockquote if (token = this.tokenizer.blockquote(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // list if (token = this.tokenizer.list(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // html if (token = this.tokenizer.html(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // def if (token = this.tokenizer.def(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.raw; this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else if (!this.tokens.links[token.tag]) { this.tokens.links[token.tag] = { href: token.href, title: token.title }; } continue; } // table (gfm) if (token = this.tokenizer.table(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // lheading if (token = this.tokenizer.lheading(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // top-level paragraph // prevent paragraph consuming extensions by clipping 'src' to extension start cutSrc = src; if (this.options.extensions && this.options.extensions.startBlock) { let startIndex = Infinity; const tempSrc = src.slice(1); let tempStart; this.options.extensions.startBlock.forEach(function(getStartIndex) { tempStart = getStartIndex.call({ lexer: this }, tempSrc); if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } }); if (startIndex < Infinity && startIndex >= 0) { cutSrc = src.substring(0, startIndex + 1); } } if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) { lastToken = tokens[tokens.length - 1]; if (lastParagraphClipped && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue.pop(); this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } lastParagraphClipped = (cutSrc.length !== src.length); src = src.substring(token.raw.length); continue; } // text if (token = this.tokenizer.text(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; if (lastToken && lastToken.type === 'text') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; this.inlineQueue.pop(); this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } continue; } if (src) { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); break; } else { throw new Error(errMsg); } } } this.state.top = true; return tokens; } inline(src, tokens = []) { this.inlineQueue.push({ src, tokens }); return tokens; } /** * Lexing/Compiling */ inlineTokens(src, tokens = []) { let token, lastToken, cutSrc; // String with links masked to avoid interference with em and strong let maskedSrc = src; let match; let keepPrevChar, prevChar; // Mask out reflinks if (this.tokens.links) { const links = Object.keys(this.tokens.links); if (links.length > 0) { while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) { if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) { maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex); } } } } // Mask out other blocks while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) { maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex); } // Mask out escaped em & strong delimiters while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) { maskedSrc = maskedSrc.slice(0, match.index + match[0].length - 2) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex); this.tokenizer.rules.inline.escapedEmSt.lastIndex--; } while (src) { if (!keepPrevChar) { prevChar = ''; } keepPrevChar = false; // extensions if (this.options.extensions && this.options.extensions.inline && this.options.extensions.inline.some((extTokenizer) => { if (token = extTokenizer.call({ lexer: this }, src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); return true; } return false; })) { continue; } // escape if (token = this.tokenizer.escape(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // tag if (token = this.tokenizer.tag(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; } else { tokens.push(token); } continue; } // link if (token = this.tokenizer.link(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; } else { tokens.push(token); } continue; } // em & strong if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // code if (token = this.tokenizer.codespan(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // br if (token = this.tokenizer.br(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // del (gfm) if (token = this.tokenizer.del(src)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // autolink if (token = this.tokenizer.autolink(src, mangle)) { src = src.substring(token.raw.length); tokens.push(token); continue; } // url (gfm) if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) { src = src.substring(token.raw.length); tokens.push(token); continue; } // text // prevent inlineText consuming extensions by clipping 'src' to extension start cutSrc = src; if (this.options.extensions && this.options.extensions.startInline) { let startIndex = Infinity; const tempSrc = src.slice(1); let tempStart; this.options.extensions.startInline.forEach(function(getStartIndex) { tempStart = getStartIndex.call({ lexer: this }, tempSrc); if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } }); if (startIndex < Infinity && startIndex >= 0) { cutSrc = src.substring(0, startIndex + 1); } } if (token = this.tokenizer.inlineText(cutSrc, smartypants)) { src = src.substring(token.raw.length); if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started prevChar = token.raw.slice(-1); } keepPrevChar = true; lastToken = tokens[tokens.length - 1]; if (lastToken && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; } else { tokens.push(token); } continue; } if (src) { const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0); if (this.options.silent) { console.error(errMsg); break; } else { throw new Error(errMsg); } } } return tokens; } }