/* * Copyright (c) 2017 Digital Bazaar, Inc. All rights reserved. */ 'use strict'; const types = require('./types'); const api = {}; module.exports = api; // define URL parser // parseUri 1.2.2 // (c) Steven Levithan // MIT License // with local jsonld.js modifications api.parsers = { simple: { // RFC 3986 basic parts keys: [ 'href', 'scheme', 'authority', 'path', 'query', 'fragment' ], /* eslint-disable-next-line max-len */ regex: /^(?:([^:\/?#]+):)?(?:\/\/([^\/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?/ }, full: { keys: [ 'href', 'protocol', 'scheme', 'authority', 'auth', 'user', 'password', 'hostname', 'port', 'path', 'directory', 'file', 'query', 'fragment' ], /* eslint-disable-next-line max-len */ regex: /^(([a-zA-Z][a-zA-Z0-9+-.]*):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?(?:(((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/ } }; api.parse = (str, parser) => { const parsed = {}; const o = api.parsers[parser || 'full']; const m = o.regex.exec(str); let i = o.keys.length; while(i--) { parsed[o.keys[i]] = (m[i] === undefined) ? null : m[i]; } // remove default ports in found in URLs if((parsed.scheme === 'https' && parsed.port === '443') || (parsed.scheme === 'http' && parsed.port === '80')) { parsed.href = parsed.href.replace(':' + parsed.port, ''); parsed.authority = parsed.authority.replace(':' + parsed.port, ''); parsed.port = null; } parsed.normalizedPath = api.removeDotSegments(parsed.path); return parsed; }; /** * Prepends a base IRI to the given relative IRI. * * @param base the base IRI. * @param iri the relative IRI. * * @return the absolute IRI. */ api.prependBase = (base, iri) => { // skip IRI processing if(base === null) { return iri; } // already an absolute IRI if(api.isAbsolute(iri)) { return iri; } // parse base if it is a string if(!base || types.isString(base)) { base = api.parse(base || ''); } // parse given IRI const rel = api.parse(iri); // per RFC3986 5.2.2 const transform = { protocol: base.protocol || '' }; if(rel.authority !== null) { transform.authority = rel.authority; transform.path = rel.path; transform.query = rel.query; } else { transform.authority = base.authority; if(rel.path === '') { transform.path = base.path; if(rel.query !== null) { transform.query = rel.query; } else { transform.query = base.query; } } else { if(rel.path.indexOf('/') === 0) { // IRI represents an absolute path transform.path = rel.path; } else { // merge paths let path = base.path; // append relative path to the end of the last directory from base path = path.substr(0, path.lastIndexOf('/') + 1); if((path.length > 0 || base.authority) && path.substr(-1) !== '/') { path += '/'; } path += rel.path; transform.path = path; } transform.query = rel.query; } } if(rel.path !== '') { // remove slashes and dots in path transform.path = api.removeDotSegments(transform.path); } // construct URL let rval = transform.protocol; if(transform.authority !== null) { rval += '//' + transform.authority; } rval += transform.path; if(transform.query !== null) { rval += '?' + transform.query; } if(rel.fragment !== null) { rval += '#' + rel.fragment; } // handle empty base if(rval === '') { rval = './'; } return rval; }; /** * Removes a base IRI from the given absolute IRI. * * @param base the base IRI. * @param iri the absolute IRI. * * @return the relative IRI if relative to base, otherwise the absolute IRI. */ api.removeBase = (base, iri) => { // skip IRI processing if(base === null) { return iri; } if(!base || types.isString(base)) { base = api.parse(base || ''); } // establish base root let root = ''; if(base.href !== '') { root += (base.protocol || '') + '//' + (base.authority || ''); } else if(iri.indexOf('//')) { // support network-path reference with empty base root += '//'; } // IRI not relative to base if(iri.indexOf(root) !== 0) { return iri; } // remove root from IRI and parse remainder const rel = api.parse(iri.substr(root.length)); // remove path segments that match (do not remove last segment unless there // is a hash or query) const baseSegments = base.normalizedPath.split('/'); const iriSegments = rel.normalizedPath.split('/'); const last = (rel.fragment || rel.query) ? 0 : 1; while(baseSegments.length > 0 && iriSegments.length > last) { if(baseSegments[0] !== iriSegments[0]) { break; } baseSegments.shift(); iriSegments.shift(); } // use '../' for each non-matching base segment let rval = ''; if(baseSegments.length > 0) { // don't count the last segment (if it ends with '/' last path doesn't // count and if it doesn't end with '/' it isn't a path) baseSegments.pop(); for(let i = 0; i < baseSegments.length; ++i) { rval += '../'; } } // prepend remaining segments rval += iriSegments.join('/'); // add query and hash if(rel.query !== null) { rval += '?' + rel.query; } if(rel.fragment !== null) { rval += '#' + rel.fragment; } // handle empty base if(rval === '') { rval = './'; } return rval; }; /** * Removes dot segments from a URL path. * * @param path the path to remove dot segments from. */ api.removeDotSegments = path => { // RFC 3986 5.2.4 (reworked) // empty path shortcut if(path.length === 0) { return ''; } const input = path.split('/'); const output = []; while(input.length > 0) { const next = input.shift(); const done = input.length === 0; if(next === '.') { if(done) { // ensure output has trailing / output.push(''); } continue; } if(next === '..') { output.pop(); if(done) { // ensure output has trailing / output.push(''); } continue; } output.push(next); } // if path was absolute, ensure output has leading / if(path[0] === '/' && output.length > 0 && output[0] !== '') { output.unshift(''); } if(output.length === 1 && output[0] === '') { return '/'; } return output.join('/'); }; // TODO: time better isAbsolute/isRelative checks using full regexes: // http://jmrware.com/articles/2009/uri_regexp/URI_regex.html // regex to check for absolute IRI (starting scheme and ':') or blank node IRI const isAbsoluteRegex = /^([A-Za-z][A-Za-z0-9+-.]*|_):[^\s]*$/; /** * Returns true if the given value is an absolute IRI or blank node IRI, false * if not. * Note: This weak check only checks for a correct starting scheme. * * @param v the value to check. * * @return true if the value is an absolute IRI, false if not. */ api.isAbsolute = v => types.isString(v) && isAbsoluteRegex.test(v); /** * Returns true if the given value is a relative IRI, false if not. * Note: this is a weak check. * * @param v the value to check. * * @return true if the value is a relative IRI, false if not. */ api.isRelative = v => types.isString(v);