import _classCallCheck from "@babel/runtime/helpers/classCallCheck"; import _createClass from "@babel/runtime/helpers/createClass"; import _defineProperty from "@babel/runtime/helpers/defineProperty"; /** * RDF/XML PARSER * * Parser believed to be in full positive RDF/XML parsing compliance * with the possible exception of handling deprecated RDF attributes * appropriately. Parser is believed to comply fully with other W3C * and industry standards where appropriate (DOM, ECMAScript, &c.) * * Author: David Sheets * * W3C® SOFTWARE NOTICE AND LICENSE * http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 * This work (and included software, documentation such as READMEs, or * other related items) is being provided by the copyright holders under * the following license. By obtaining, using and/or copying this work, * you (the licensee) agree that you have read, understood, and will * comply with the following terms and conditions. * * Permission to copy, modify, and distribute this software and its * documentation, with or without modification, for any purpose and * without fee or royalty is hereby granted, provided that you include * the following on ALL copies of the software and documentation or * portions thereof, including modifications: * * 1. The full text of this NOTICE in a location viewable to users of * the redistributed or derivative work. * 2. Any pre-existing intellectual property disclaimers, notices, or terms and * conditions. If none exist, the W3C Software Short Notice should be * included (hypertext is preferred, text is permitted) within the body * of any redistributed or derivative code. * 3. Notice of any changes or modifications to the files, including the * date changes were made. (We recommend you provide URIs to the location * from which the code is derived.) * * THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT * HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS * FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR * DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, * TRADEMARKS OR OTHER RIGHTS. * * COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL * OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR * DOCUMENTATION. * * The name and trademarks of copyright holders may NOT be used in * advertising or publicity pertaining to the software without specific, * written prior permission. Title to copyright in this software and any * associated documentation will at all times remain with copyright * holders. */ /** * @class RDFParser resource object tied to an RDFStore * * @author David Sheets * */ import * as uriUtil from './uri'; var RDFParser = /*#__PURE__*/function () { /* * @constructor * @param {RDFStore} store An RDFStore object */ function RDFParser(store) { _classCallCheck(this, RDFParser); /** Our triple store reference @private */ this.store = store; /** Our identified blank nodes @private */ this.bnodes = {}; /** A context for context-aware stores @private */ this.why = null; /** Reification flag */ this.reify = false; } /** Standard namespaces that we know how to handle @final * @member RDFParser */ return _createClass(RDFParser, [{ key: "frameFactory", value: /** * Frame class for namespace and base URI lookups * Base lookups will always resolve because the parser knows * the default base. * * @private */ function frameFactory(parser, parent, element) { return { 'NODE': 1, 'ARC': 2, 'parent': parent, 'parser': parser, 'store': parser.store, 'element': element, 'lastChild': 0, 'base': null, 'lang': null, 'node': null, 'nodeType': null, 'listIndex': 1, 'rdfid': null, 'datatype': null, 'collection': false, /** Terminate the frame and notify the store that we're done */ 'terminateFrame': function terminateFrame() { if (this.collection) { this.node.close(); } }, /** Add a symbol of a certain type to the this frame */'addSymbol': function addSymbol(type, uri) { uri = uriUtil.join(uri, this.base); this.node = this.store.sym(uri); this.nodeType = type; }, /** Load any constructed triples into the store */'loadTriple': function loadTriple() { if (this.parent.parent.collection) { this.parent.parent.node.append(this.node); } else { this.store.add(this.parent.parent.node, this.parent.node, this.node, this.parser.why); } if (this.parent.rdfid != null) { // reify var triple = this.store.sym(uriUtil.join('#' + this.parent.rdfid, this.base)); this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'type'), this.store.sym(RDFParser.ns.RDF + 'Statement'), this.parser.why); this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'subject'), this.parent.parent.node, this.parser.why); this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'predicate'), this.parent.node, this.parser.why); this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'object'), this.node, this.parser.why); } }, /** Check if it's OK to load a triple */'isTripleToLoad': function isTripleToLoad() { return this.parent != null && this.parent.parent != null && this.nodeType === this.NODE && this.parent.nodeType === this.ARC && this.parent.parent.nodeType === this.NODE; }, /** Add a symbolic node to this frame */'addNode': function addNode(uri) { this.addSymbol(this.NODE, uri); if (this.isTripleToLoad()) { this.loadTriple(); } }, /** Add a collection node to this frame */'addCollection': function addCollection() { this.nodeType = this.NODE; this.node = this.store.collection(); this.collection = true; if (this.isTripleToLoad()) { this.loadTriple(); } }, /** Add a collection arc to this frame */'addCollectionArc': function addCollectionArc() { this.nodeType = this.ARC; }, /** Add a bnode to this frame */'addBNode': function addBNode(id) { if (id != null) { if (this.parser.bnodes[id] != null) { this.node = this.parser.bnodes[id]; } else { this.node = this.parser.bnodes[id] = this.store.bnode(); } } else { this.node = this.store.bnode(); } this.nodeType = this.NODE; if (this.isTripleToLoad()) { this.loadTriple(); } }, /** Add an arc or property to this frame */'addArc': function addArc(uri) { if (uri === RDFParser.ns.RDF + 'li') { uri = RDFParser.ns.RDF + '_' + this.parent.listIndex; this.parent.listIndex++; } this.addSymbol(this.ARC, uri); }, /** Add a literal to this frame */'addLiteral': function addLiteral(value) { if (this.parent.datatype && this.parent.datatype !== RDFParser.ns.RDF + 'langString') { this.node = this.store.literal(value, this.store.sym(this.parent.datatype)); } else { this.node = this.store.literal(value, this.lang); } this.nodeType = this.NODE; if (this.isTripleToLoad()) { this.loadTriple(); } } }; } // from the OpenLayers source .. needed to get around IE problems. }, { key: "getAttributeNodeNS", value: function getAttributeNodeNS(node, uri, name) { var attributeNode = null; if (node.getAttributeNodeNS) { attributeNode = node.getAttributeNodeNS(uri, name); } else { var attributes = node.attributes; var potentialNode, fullName; for (var i = 0; i < attributes.length; ++i) { potentialNode = attributes[i]; if (potentialNode.namespaceURI === uri) { fullName = potentialNode.prefix ? potentialNode.prefix + ':' + name : name; if (fullName === potentialNode.nodeName) { attributeNode = potentialNode; break; } } } } return attributeNode; } /** * Build our initial scope frame and parse the DOM into triples * @param {HTMLDocument} document The DOM to parse * @param {String} base The base URL to use * @param {Object} why The context to which this resource belongs */ }, { key: "parse", value: function parse(document, base, why) { var children = document.childNodes; // clean up for the next run this.cleanParser(); // figure out the root element var root; if (document.nodeType === RDFParser.nodeType.DOCUMENT) { for (var c = 0; c < children.length; c++) { if (children[c].nodeType === RDFParser.nodeType.ELEMENT) { root = children[c]; break; } } } else if (document.nodeType === RDFParser.nodeType.ELEMENT) { root = document; } else { throw new Error("RDFParser: can't find root in " + base + '. Halting. '); // return false } this.why = why; // our topmost frame var f = this.frameFactory(this); this.base = base; f.base = base; f.lang = null; // was '' but can't have langs like that 2015 (!) this.parseDOM(this.buildFrame(f, root)); return true; } }, { key: "parseDOM", value: function parseDOM(frame) { // a DOM utility function used in parsing var rdfid; var elementURI = function (el) { var result = ''; if (el.namespaceURI == null) { throw new Error('RDF/XML syntax error: No namespace for ' + el.localName + ' in ' + this.base); } if (el.namespaceURI) { result = result + el.namespaceURI; } if (el.localName) { result = result + el.localName; } else if (el.nodeName) { if (el.nodeName.indexOf(':') >= 0) result = result + el.nodeName.split(':')[1];else result = result + el.nodeName; } return result; }.bind(this); var dig = true; // if we'll dig down in the tree on the next iter while (frame.parent) { var dom = frame.element; var attrs = dom.attributes; if (dom.nodeType === RDFParser.nodeType.TEXT || dom.nodeType === RDFParser.nodeType.CDATA_SECTION) { // we have a literal if (frame.parent.nodeType === frame.NODE) { // must have had attributes, store as rdf:value frame.addArc(RDFParser.ns.RDF + 'value'); frame = this.buildFrame(frame); } frame.addLiteral(dom.nodeValue); } else if (elementURI(dom) !== RDFParser.ns.RDF + 'RDF') { // not root if (frame.parent && frame.parent.collection) { // we're a collection element frame.addCollectionArc(); frame = this.buildFrame(frame, frame.element); frame.parent.element = null; } if (!frame.parent || !frame.parent.nodeType || frame.parent.nodeType === frame.ARC) { // we need a node var about = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'about'); rdfid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'ID'); if (about && rdfid) { throw new Error('RDFParser: ' + dom.nodeName + ' has both rdf:id and rdf:about.' + ' Halting. Only one of these' + ' properties may be specified on a' + ' node.'); } if (!about && rdfid) { frame.addNode('#' + rdfid.nodeValue); dom.removeAttributeNode(rdfid); } else if (about == null && rdfid == null) { var bnid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'nodeID'); if (bnid) { frame.addBNode(bnid.nodeValue); dom.removeAttributeNode(bnid); } else { frame.addBNode(); } } else { frame.addNode(about.nodeValue); dom.removeAttributeNode(about); } // Typed nodes var rdftype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'type'); if (RDFParser.ns.RDF + 'Description' !== elementURI(dom)) { rdftype = { 'nodeValue': elementURI(dom) }; } if (rdftype != null) { this.store.add(frame.node, this.store.sym(RDFParser.ns.RDF + 'type'), this.store.sym(uriUtil.join(rdftype.nodeValue, frame.base)), this.why); if (rdftype.nodeName) { dom.removeAttributeNode(rdftype); } } // Property Attributes for (var x = attrs.length - 1; x >= 0; x--) { this.store.add(frame.node, this.store.sym(elementURI(attrs[x])), this.store.literal(attrs[x].nodeValue, frame.lang), this.why); } } else { // we should add an arc (or implicit bnode+arc) frame.addArc(elementURI(dom)); // save the arc's rdf:ID if it has one if (this.reify) { rdfid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'ID'); if (rdfid) { frame.rdfid = rdfid.nodeValue; dom.removeAttributeNode(rdfid); } } var parsetype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'parseType'); var datatype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'datatype'); if (datatype) { frame.datatype = datatype.nodeValue; dom.removeAttributeNode(datatype); } if (parsetype) { var nv = parsetype.nodeValue; if (nv === 'Literal') { frame.datatype = RDFParser.ns.RDF + 'XMLLiteral'; frame = this.buildFrame(frame); // Don't include the literal node, only its children // see https://github.com/linkeddata/rdflib.js/issues/75 frame.addLiteral(dom.innerHTML || dom.childNodes); dig = false; } else if (nv === 'Resource') { frame = this.buildFrame(frame, frame.element); frame.parent.element = null; frame.addBNode(); } else if (nv === 'Collection') { frame = this.buildFrame(frame, frame.element); frame.parent.element = null; frame.addCollection(); } dom.removeAttributeNode(parsetype); } if (attrs.length !== 0) { var resource = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'resource'); var bnid2 = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'nodeID'); frame = this.buildFrame(frame); if (resource) { frame.addNode(resource.nodeValue); dom.removeAttributeNode(resource); } else { if (bnid2) { frame.addBNode(bnid2.nodeValue); dom.removeAttributeNode(bnid2); } else { frame.addBNode(); } } for (var x1 = attrs.length - 1; x1 >= 0; x1--) { var f = this.buildFrame(frame); f.addArc(elementURI(attrs[x1])); if (elementURI(attrs[x1]) === RDFParser.ns.RDF + 'type') { this.buildFrame(f).addNode(attrs[x1].nodeValue); } else { this.buildFrame(f).addLiteral(attrs[x1].nodeValue); } } } else if (dom.childNodes.length === 0) { this.buildFrame(frame).addLiteral(''); } } } // rdf:RDF // dig dug dom = frame.element; while (frame.parent) { var pframe = frame; while (dom == null) { frame = frame.parent; dom = frame.element; } var candidate = dom.childNodes && dom.childNodes[frame.lastChild]; if (!candidate || !dig) { frame.terminateFrame(); if (!(frame = frame.parent)) { break; } // done dom = frame.element; dig = true; } else if (candidate.nodeType !== RDFParser.nodeType.ELEMENT && candidate.nodeType !== RDFParser.nodeType.TEXT && candidate.nodeType !== RDFParser.nodeType.CDATA_SECTION || (candidate.nodeType === RDFParser.nodeType.TEXT || candidate.nodeType === RDFParser.nodeType.CDATA_SECTION) && dom.childNodes.length !== 1) { frame.lastChild++; } else { // not a leaf frame.lastChild++; frame = this.buildFrame(pframe, dom.childNodes[frame.lastChild - 1]); break; } } } // while } /** * Cleans out state from a previous parse run * @private */ }, { key: "cleanParser", value: function cleanParser() { this.bnodes = {}; this.why = null; } /** * Builds scope frame * @private */ }, { key: "buildFrame", value: function buildFrame(parent, element) { var frame = this.frameFactory(this, parent, element); if (parent) { frame.base = parent.base; frame.lang = parent.lang; } if (!element || element.nodeType === RDFParser.nodeType.TEXT || element.nodeType === RDFParser.nodeType.CDATA_SECTION) { return frame; } var attrs = element.attributes; var base = element.getAttributeNode('xml:base'); if (base != null) { frame.base = base.nodeValue; element.removeAttribute('xml:base'); } var lang = element.getAttributeNode('xml:lang'); if (lang != null) { frame.lang = lang.nodeValue; element.removeAttribute('xml:lang'); } // remove all extraneous xml and xmlns attributes for (var x = attrs.length - 1; x >= 0; x--) { if (attrs[x].nodeName.substr(0, 3) === 'xml') { if (attrs[x].name.slice(0, 6) === 'xmlns:') { var uri = attrs[x].nodeValue; // alert('base for namespac attr:'+this.base) if (this.base) uri = uriUtil.join(uri, this.base); this.store.setPrefixForURI(attrs[x].name.slice(6), uri); } // alert('rdfparser: xml atribute: '+attrs[x].name) //@@ element.removeAttributeNode(attrs[x]); } } return frame; } }]); }(); _defineProperty(RDFParser, "ns", { 'RDF': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'RDFS': 'http://www.w3.org/2000/01/rdf-schema#' }); /** DOM Level 2 node type magic numbers @final * @member RDFParser */ _defineProperty(RDFParser, "nodeType", { 'ELEMENT': 1, 'ATTRIBUTE': 2, 'TEXT': 3, 'CDATA_SECTION': 4, 'ENTITY_REFERENCE': 5, 'ENTITY': 6, 'PROCESSING_INSTRUCTION': 7, 'COMMENT': 8, 'DOCUMENT': 9, 'DOCUMENT_TYPE': 10, 'DOCUMENT_FRAGMENT': 11, 'NOTATION': 12 }); export { RDFParser as default };