var conventions = require("./conventions"); var dom = require('./dom') var entities = require('./entities'); var sax = require('./sax'); var DOMImplementation = dom.DOMImplementation; var NAMESPACE = conventions.NAMESPACE; var ParseError = sax.ParseError; var XMLReader = sax.XMLReader; /** * Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends: * * > XML parsed entities are often stored in computer files which, * > for editing convenience, are organized into lines. * > These lines are typically separated by some combination * > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA). * > * > To simplify the tasks of applications, the XML processor must behave * > as if it normalized all line breaks in external parsed entities (including the document entity) * > on input, before parsing, by translating all of the following to a single #xA character: * > * > 1. the two-character sequence #xD #xA * > 2. the two-character sequence #xD #x85 * > 3. the single character #x85 * > 4. the single character #x2028 * > 5. any #xD character that is not immediately followed by #xA or #x85. * * @param {string} input * @returns {string} */ function normalizeLineEndings(input) { return input .replace(/\r[\n\u0085]/g, '\n') .replace(/[\r\u0085\u2028]/g, '\n') } /** * @typedef Locator * @property {number} [columnNumber] * @property {number} [lineNumber] */ /** * @typedef DOMParserOptions * @property {DOMHandler} [domBuilder] * @property {Function} [errorHandler] * @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing * defaults to `normalizeLineEndings` * @property {Locator} [locator] * @property {Record} [xmlns] * * @see normalizeLineEndings */ /** * The DOMParser interface provides the ability to parse XML or HTML source code * from a string into a DOM `Document`. * * _xmldom is different from the spec in that it allows an `options` parameter, * to override the default behavior._ * * @param {DOMParserOptions} [options] * @constructor * * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization */ function DOMParser(options){ this.options = options ||{locator:{}}; } DOMParser.prototype.parseFromString = function(source,mimeType){ var options = this.options; var sax = new XMLReader(); var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler var errorHandler = options.errorHandler; var locator = options.locator; var defaultNSMap = options.xmlns||{}; var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1; var entityMap = isHTML ? entities.HTML_ENTITIES : entities.XML_ENTITIES; if(locator){ domBuilder.setDocumentLocator(locator) } sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator); sax.domBuilder = options.domBuilder || domBuilder; if(isHTML){ defaultNSMap[''] = NAMESPACE.HTML; } defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML; var normalize = options.normalizeLineEndings || normalizeLineEndings; if (source && typeof source === 'string') { sax.parse( normalize(source), defaultNSMap, entityMap ) } else { sax.errorHandler.error('invalid doc source') } return domBuilder.doc; } function buildErrorHandler(errorImpl,domBuilder,locator){ if(!errorImpl){ if(domBuilder instanceof DOMHandler){ return domBuilder; } errorImpl = domBuilder ; } var errorHandler = {} var isCallback = errorImpl instanceof Function; locator = locator||{} function build(key){ var fn = errorImpl[key]; if(!fn && isCallback){ fn = errorImpl.length == 2?function(msg){errorImpl(key,msg)}:errorImpl; } errorHandler[key] = fn && function(msg){ fn('[xmldom '+key+']\t'+msg+_locator(locator)); }||function(){}; } build('warning'); build('error'); build('fatalError'); return errorHandler; } //console.log('#\n\n\n\n\n\n\n####') /** * +ContentHandler+ErrorHandler * +LexicalHandler+EntityResolver2 * -DeclHandler-DTDHandler * * DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler * DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2 * @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html */ function DOMHandler() { this.cdata = false; } function position(locator,node){ node.lineNumber = locator.lineNumber; node.columnNumber = locator.columnNumber; } /** * @see org.xml.sax.ContentHandler#startDocument * @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html */ DOMHandler.prototype = { startDocument : function() { this.doc = new DOMImplementation().createDocument(null, null, null); if (this.locator) { this.doc.documentURI = this.locator.systemId; } }, startElement:function(namespaceURI, localName, qName, attrs) { var doc = this.doc; var el = doc.createElementNS(namespaceURI, qName||localName); var len = attrs.length; appendElement(this, el); this.currentElement = el; this.locator && position(this.locator,el) for (var i = 0 ; i < len; i++) { var namespaceURI = attrs.getURI(i); var value = attrs.getValue(i); var qName = attrs.getQName(i); var attr = doc.createAttributeNS(namespaceURI, qName); this.locator &&position(attrs.getLocator(i),attr); attr.value = attr.nodeValue = value; el.setAttributeNode(attr) } }, endElement:function(namespaceURI, localName, qName) { var current = this.currentElement var tagName = current.tagName; this.currentElement = current.parentNode; }, startPrefixMapping:function(prefix, uri) { }, endPrefixMapping:function(prefix) { }, processingInstruction:function(target, data) { var ins = this.doc.createProcessingInstruction(target, data); this.locator && position(this.locator,ins) appendElement(this, ins); }, ignorableWhitespace:function(ch, start, length) { }, characters:function(chars, start, length) { chars = _toString.apply(this,arguments) //console.log(chars) if(chars){ if (this.cdata) { var charNode = this.doc.createCDATASection(chars); } else { var charNode = this.doc.createTextNode(chars); } if(this.currentElement){ this.currentElement.appendChild(charNode); }else if(/^\s*$/.test(chars)){ this.doc.appendChild(charNode); //process xml } this.locator && position(this.locator,charNode) } }, skippedEntity:function(name) { }, endDocument:function() { this.doc.normalize(); }, setDocumentLocator:function (locator) { if(this.locator = locator){// && !('lineNumber' in locator)){ locator.lineNumber = 0; } }, //LexicalHandler comment:function(chars, start, length) { chars = _toString.apply(this,arguments) var comm = this.doc.createComment(chars); this.locator && position(this.locator,comm) appendElement(this, comm); }, startCDATA:function() { //used in characters() methods this.cdata = true; }, endCDATA:function() { this.cdata = false; }, startDTD:function(name, publicId, systemId) { var impl = this.doc.implementation; if (impl && impl.createDocumentType) { var dt = impl.createDocumentType(name, publicId, systemId); this.locator && position(this.locator,dt) appendElement(this, dt); this.doc.doctype = dt; } }, /** * @see org.xml.sax.ErrorHandler * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html */ warning:function(error) { console.warn('[xmldom warning]\t'+error,_locator(this.locator)); }, error:function(error) { console.error('[xmldom error]\t'+error,_locator(this.locator)); }, fatalError:function(error) { throw new ParseError(error, this.locator); } } function _locator(l){ if(l){ return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']' } } function _toString(chars,start,length){ if(typeof chars == 'string'){ return chars.substr(start,length) }else{//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)") if(chars.length >= start+length || start){ return new java.lang.String(chars,start,length)+''; } return chars; } } /* * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html * used method of org.xml.sax.ext.LexicalHandler: * #comment(chars, start, length) * #startCDATA() * #endCDATA() * #startDTD(name, publicId, systemId) * * * IGNORED method of org.xml.sax.ext.LexicalHandler: * #endDTD() * #startEntity(name) * #endEntity(name) * * * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html * IGNORED method of org.xml.sax.ext.DeclHandler * #attributeDecl(eName, aName, type, mode, value) * #elementDecl(name, model) * #externalEntityDecl(name, publicId, systemId) * #internalEntityDecl(name, value) * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html * IGNORED method of org.xml.sax.EntityResolver2 * #resolveEntity(String name,String publicId,String baseURI,String systemId) * #resolveEntity(publicId, systemId) * #getExternalSubset(name, baseURI) * @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html * IGNORED method of org.xml.sax.DTDHandler * #notationDecl(name, publicId, systemId) {}; * #unparsedEntityDecl(name, publicId, systemId, notationName) {}; */ "endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl".replace(/\w+/g,function(key){ DOMHandler.prototype[key] = function(){return null} }) /* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */ function appendElement (hander,node) { if (!hander.currentElement) { hander.doc.appendChild(node); } else { hander.currentElement.appendChild(node); } }//appendChild and setAttributeNS are preformance key exports.__DOMHandler = DOMHandler; exports.normalizeLineEndings = normalizeLineEndings; exports.DOMParser = DOMParser;