// Converting between SPARQL queries and the $rdf query API /* function SQuery () { this.terms = [] return this } STerm.prototype.toString = STerm.val SQuery.prototype.add = function (str) {this.terms.push()}*/ import log from './log' import { Query } from './query' /** * @SPARQL: SPARQL text that is converted to a query object which is returned. * @testMode: testing flag. Prevents loading of sources. */ export default function SPARQLToQuery (SPARQL, testMode, kb) { // AJAR_ClearTable() var variableHash = [] function makeVar (name) { if (variableHash[name]) { return variableHash[name] } var newVar = kb.variable(name) variableHash[name] = newVar return newVar } // term type functions function isRealText (term) { return (typeof term === 'string' && term.match(/[^ \n\t]/)) } function isVar (term) { return (typeof term === 'string' && term.match(/^[\?\$]/)) } function fixSymbolBrackets (term) { if (typeof term === 'string') { return term.replace(/^</, '<').replace(/>$/, '>') } else { return term } } function isSymbol (term) { return (typeof term === 'string' && term.match(/^<[^>]*>$/)) } function isBnode (term) { return (typeof term === 'string' && (term.match(/^_:/) || term.match(/^$/))) } function isPrefix (term) { return (typeof term === 'string' && term.match(/:$/)) } function isPrefixedSymbol (term) { return (typeof term === 'string' && term.match(/^:|^[^_][^:]*:/)) } function getPrefix (term) { var a = term.split(':') return a[0] } function getSuffix (term) { var a = term.split(':') return a[1] } function removeBrackets (term) { if (isSymbol(term)) { return term.slice(1, term.length - 1) } else { return term } } // takes a string and returns an array of strings and Literals in the place of literals function parseLiterals (str) { // var sin = (str.indexOf(/[ \n]\'/)==-1)?null:str.indexOf(/[ \n]\'/), doub = (str.indexOf(/[ \n]\"/)==-1)?null:str.indexOf(/[ \n]\"/) var sin = (str.indexOf("'") === -1) ? null : str.indexOf("'") var doub = (str.indexOf('"') === -1) ? null : str.indexOf('"') // alert("S: "+sin+" D: "+doub) if (!sin && !doub) { var a = new Array(1) a[0] = str return a } var res = new Array(2) var br var ind if (!sin || (doub && doub < sin)) { br = '"' ind = doub } else if (!doub || (sin && sin < doub)) { br = "'" ind = sin } else { log.error('SQARQL QUERY OOPS!') return res } res[0] = str.slice(0, ind) var end = str.slice(ind + 1).indexOf(br) if (end === -1) { log.error('SPARQL parsing error: no matching parentheses in literal ' + str) return str } // alert(str.slice(end + ind + 2).match(/^\^\^/)) var end2 if (str.slice(end + ind + 2).match(/^\^\^/)) { end2 = str.slice(end + ind + 2).indexOf(' ') // alert(end2) res[1] = kb.literal( str.slice(ind + 1, ind + 1 + end), kb.sym(removeBrackets( str.slice(ind + 4 + end, ind + 2 + end + end2)) ) ) // alert(res[1].datatype.uri) res = res.concat(parseLiterals(str.slice(end + ind + 3 + end2))) } else if (str.slice(end + ind + 2).match(/^@/)) { end2 = str.slice(end + ind + 2).indexOf(' ') // alert(end2) res[1] = kb.literal( str.slice(ind + 1, ind + 1 + end), str.slice(ind + 3 + end, ind + 2 + end + end2), null ) // alert(res[1].datatype.uri) res = res.concat( parseLiterals(str.slice(end + ind + 2 + end2)) ) } else { res[1] = kb.literal(str.slice(ind + 1, ind + 1 + end)) log.info('Literal found: ' + res[1]) res = res.concat(parseLiterals(str.slice(end + ind + 2))) // finds any other literals } return res } function spaceDelimit (str) { str = str.replace(/\(/g, ' ( ') .replace(/\)/g, ' ) ') .replace(//g, '> ') .replace(/{/g, ' { ') .replace(/}/g, ' } ') .replace(/[\t\n\r]/g, ' ') .replace(/; /g, ' ; ') .replace(/\. /g, ' . ') .replace(/, /g, ' , ') log.info('New str into spaceDelimit: \n' + str) var res = [] var br = str.split(' ') for (var x in br) { if (isRealText(br[x])) { res = res.concat(br[x]) } } return res } function replaceKeywords (input) { var strarr = input for (var x = 0; x < strarr.length; x++) { if (strarr[x] === 'a') { strarr[x] = '' } if (strarr[x] === 'is' && strarr[x + 2] === 'of') { strarr.splice(x, 1) strarr.splice(x + 1, 1) var s = strarr[x - 1] strarr[x - 1] = strarr[x + 1] strarr[x + 1] = s } } return strarr } function toTerms (input) { var res = [] for (var x = 0; x < input.length; x++) { if (typeof input[x] !== 'string') { res[x] = input[x] continue } input[x] = fixSymbolBrackets(input[x]) if (isVar(input[x])) { res[x] = makeVar(input[x].slice(1)) } else if (isBnode(input[x])) { log.info(input[x] + ' was identified as a bnode.') res[x] = kb.bnode() } else if (isSymbol(input[x])) { log.info(input[x] + ' was identified as a symbol.') res[x] = kb.sym(removeBrackets(input[x])) } else if (isPrefixedSymbol(input[x])) { log.info(input[x] + ' was identified as a prefixed symbol') if (prefixes[getPrefix(input[x])]) { res[x] = kb.sym(input[x] = prefixes[getPrefix(input[x])] + getSuffix(input[x])) } else { log.error('SPARQL error: ' + input[x] + ' with prefix ' + getPrefix(input[x]) + ' does not have a correct prefix entry.') res[x] = input[x] } } else { res[x] = input[x] } } return res } function tokenize (str) { var token1 = parseLiterals(str) var token2 = [] for (var x in token1) { if (typeof token1[x] === 'string') { token2 = token2.concat(spaceDelimit(token1[x])) } else { token2 = token2.concat(token1[x]) } } token2 = replaceKeywords(token2) log.info('SPARQL Tokens: ' + token2) return token2 } // CASE-INSENSITIVE function arrayIndexOf (str, arr) { for (var i = 0; i < arr.length; i++) { if (typeof arr[i] !== 'string') { continue } if (arr[i].toLowerCase() === str.toLowerCase()) { return i } } // log.warn("No instance of "+str+" in array "+arr) return null } // CASE-INSENSITIVE function arrayIndicesOf (str, arr) { var ind = [] for (var i = 0; i < arr.length; i++) { if (typeof arr[i] !== 'string') { continue } if (arr[i].toLowerCase() === str.toLowerCase()) { ind.push(i) } } return ind } function setVars (input, query) { log.info('SPARQL vars: ' + input) for (var x in input) { if (isVar(input[x])) { log.info('Added ' + input[x] + ' to query variables from SPARQL') var v = makeVar(input[x].slice(1)) query.vars.push(v) v.label = input[x].slice(1) } else { log.warn('Incorrect SPARQL variable in SELECT: ' + input[x]) } } } function getPrefixDeclarations (input) { var prefInd = arrayIndicesOf('PREFIX', input) var res = [] for (var i in prefInd) { var a = input[prefInd[i] + 1] var b = input[prefInd[i] + 2] if (!isPrefix(a)) { log.error('Invalid SPARQL prefix: ' + a) } else if (!isSymbol(b)) { log.error('Invalid SPARQL symbol: ' + b) } else { log.info('Prefix found: ' + a + ' -> ' + b) var pref = getPrefix(a) var symbol = removeBrackets(b) res[pref] = symbol } } return res } function getMatchingBracket (arr, open, close) { log.info('Looking for a close bracket of type ' + close + ' in ' + arr) var index = 0 for (var i = 0; i < arr.length; i++) { if (arr[i] === open) { index++ } if (arr[i] === close) { index-- } if (index < 0) { return i } } log.error('Statement had no close parenthesis in SPARQL query') return 0 } function constraintGreaterThan (value) { this.describe = function (varstr) { return varstr + ' > ' + value.toNT() } this.test = function (term) { if (term.value.match(/[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?/)) { return (parseFloat(term.value) > parseFloat(value)) } else { return (term.toNT() > value.toNT()) } } return this } function constraintLessThan (value) { // this is not the recommended usage. Should only work on literal, numeric, dateTime this.describe = function (varstr) { return varstr + ' < ' + value.toNT() } this.test = function (term) { // this.describe = function (varstr) { return varstr + " < "+value } if (term.value.match(/[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?/)) { return (parseFloat(term.value) < parseFloat(value)) } else { return (term.toNT() < value.toNT()) } } return this } // This should only work on literals but doesn't. function ConstraintEqualTo (value) { this.describe = function (varstr) { return varstr + ' = ' + value.toNT() } this.test = function (term) { return value.equals(term) } return this } // value must be a literal function ConstraintRegexp (value) { this.describe = function (varstr) { return "REGEXP( '" + value + "' , " + varstr + ' )' } this.test = function (term) { var str = value // str = str.replace(/^//,"").replace(//$/,"") var rg = new RegExp(str) if (term.value) { return rg.test(term.value) } else { return false } } } function setConstraint (input, pat) { if (input.length === 3 && input[0].termType === 'Variable' && (input[2].termType === 'NamedNode' || input[2].termType === 'Literal')) { if (input[1] === '=') { log.debug('Constraint added: ' + input) pat.constraints[input[0]] = new ConstraintEqualTo(input[2]) } else if (input[1] === '>') { log.debug('Constraint added: ' + input) pat.constraints[input[0]] = new ConstraintEqualTo(input[2]) } else if (input[1] === '<') { log.debug('Constraint added: ' + input) pat.constraints[input[0]] = new ConstraintEqualTo(input[2]) } else { log.warn("I don't know how to handle the constraint: " + input) } } else if (input.length === 6 && typeof input[0] === 'string' && input[0].toLowerCase() === 'regexp' && input[1] === '(' && input[5] === ')' && input[3] === ',' && input[4].termType === 'Variable' && input[2].termType === 'Literal') { log.debug('Constraint added: ' + input) pat.constraints[input[4]] = new ConstraintRegexp(input[2].value) } // log.warn("I don't know how to handle the constraint: "+input) // alert("length: "+input.length+" input 0 type: "+input[0].termType+" input 1: "+input[1]+" input[2] type: "+input[2].termType) } function setOptional (terms, pat) { log.debug('Optional query: ' + terms + ' not yet implemented.') var opt = kb.formula() setWhere(terms, opt) pat.optional.push(opt) } function setWhere (input, pat) { var terms = toTerms(input) var end log.debug('WHERE: ' + terms) var opt // var opt = arrayIndicesOf("OPTIONAL",terms) while (arrayIndexOf('OPTIONAL', terms)) { opt = arrayIndexOf('OPTIONAL', terms) log.debug('OPT: ' + opt + ' ' + terms[opt] + ' in ' + terms) if (terms[opt + 1] !== '{') { log.warn('Bad optional opening bracket in word ' + opt) } end = getMatchingBracket(terms.slice(opt + 2), '{', '}') if (end === -1) { log.error('No matching bracket in word ' + opt) } else { setOptional(terms.slice(opt + 2, opt + 2 + end), pat) // alert(pat.statements[0].toNT()) opt = arrayIndexOf('OPTIONAL', terms) end = getMatchingBracket(terms.slice(opt + 2), '{', '}') terms.splice(opt, end + 3) } } log.debug('WHERE after optionals: ' + terms) while (arrayIndexOf('FILTER', terms)) { var filt = arrayIndexOf('FILTER', terms) if (terms[filt + 1] !== '(') { log.warn('Bad filter opening bracket in word ' + filt) } end = getMatchingBracket(terms.slice(filt + 2), '(', ')') if (end === -1) { log.error('No matching bracket in word ' + filt) } else { setConstraint(terms.slice(filt + 2, filt + 2 + end), pat) filt = arrayIndexOf('FILTER', terms) end = getMatchingBracket(terms.slice(filt + 2), '(', ')') terms.splice(filt, end + 3) } } log.debug('WHERE after filters and optionals: ' + terms) extractStatements(terms, pat) } function extractStatements (terms, formula) { var arrayZero = new Array(1) arrayZero[0] = -1 // this is just to add the beginning of the where to the periods index. var per = arrayZero.concat(arrayIndicesOf('.', terms)) var stat = [] for (var x = 0; x < per.length - 1; x++) { stat[x] = terms.slice(per[x] + 1, per[x + 1]) } // Now it's in an array of statements for (x in stat) { // THIS MUST BE CHANGED FOR COMMA, SEMICOLON log.info('s+p+o ' + x + ' = ' + stat[x]) var subj = stat[x][0] stat[x].splice(0, 1) var sem = arrayZero.concat(arrayIndicesOf(';', stat[x])) sem.push(stat[x].length) var stat2 = [] for (var y = 0; y < sem.length - 1; y++) { stat2[y] = stat[x].slice(sem[y] + 1, sem[y + 1]) } for (x in stat2) { log.info('p+o ' + x + ' = ' + stat[x]) var pred = stat2[x][0] stat2[x].splice(0, 1) var com = arrayZero.concat(arrayIndicesOf(',', stat2[x])) com.push(stat2[x].length) var stat3 = [] for (y = 0; y < com.length - 1; y++) { stat3[y] = stat2[x].slice(com[y] + 1, com[y + 1]) } for (x in stat3) { var obj = stat3[x][0] log.info('Subj=' + subj + ' Pred=' + pred + ' Obj=' + obj) formula.add(subj, pred, obj) } } } } // ******************************* Body of SPARQLToQuery ***************************// log.info('SPARQL input: \n' + SPARQL) var q = new Query() var sp = tokenize(SPARQL) // first tokenize everything var prefixes = getPrefixDeclarations(sp) if (!prefixes.rdf) { prefixes.rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' } if (!prefixes.rdfs) { prefixes.rdfs = 'http://www.w3.org/2000/01/rdf-schema#' } var selectLoc = arrayIndexOf('SELECT', sp) var whereLoc = arrayIndexOf('WHERE', sp) if (selectLoc < 0 || whereLoc < 0 || selectLoc > whereLoc) { log.error('Invalid or nonexistent SELECT and WHERE tags in SPARQL query') return false } setVars(sp.slice(selectLoc + 1, whereLoc), q) setWhere(sp.slice(whereLoc + 2, sp.length - 1), q.pat) if (testMode) { return q } for (var x in q.pat.statements) { var st = q.pat.statements[x] if (st.subject.termType === 'NamedNode') { if (kb.fetcher) { kb.fetcher.lookUpThing(st.subject, 'sparql:' + st.subject) } } if (st.object.termType === 'NamedNode') { if (kb.fetcher) { kb.fetcher.lookUpThing(st.object, 'sparql:' + st.object) } } } // alert(q.pat) return q // checkVars() // *******************************************************************// }