Module:Wikidata/Chemin/parser
local tool = require("Module:Utilitaire") local path = require "Module:Wikidata/Chemin/Path" local parser = require "Module:FParser"
local pparser = {}
--[[
grammar :
letter ::= "A" | "B" | "C" | "D" | "E" | "F" | "G"
| "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" ;
digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; space ::= " " ;
Pid ::= "P" , digit, { digit } ; Pname ::= letter, { letter | digit | space | "'" } ;
PathFirstLevel ::= pathFirstAlternative
-- Rules specific to allow to start from a statement instead of an item on the highest level of a path, variant of PathAlternative and PathSequence
pathFirstAlternative ::= PathFirstSequence ( '|' PathFirstSequence )* PathFirstSequence
::= ('>' PathQualifier | PathEltOrInverse ) ( '/' PathEltOrInverse | '^' PathElt )*
Path ::= PathAlternative PathAlternative ::= PathSequence ( '|' PathSequence )* PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* PathElt ::= PathPrimary PathMod? PathEltOrInverse ::= PathElt | '^' PathElt PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) ) PathPrimary ::= ( Prop | 'a' | '(' Path ')'
| ( Prop | '!' PathNegatedPropertySet ) '>' PathQualifier | '!' PathNegatedPropertySet )
PathQualifier ::= ( Prop | '!' PathNegatedPropertySet | PathPropertySet )
Prop ::= IRIref | Pid | Pname
rules 95 and 96 in https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rPathNegatedPropertySet
PathNegatedPropertySet ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')' PathOneInPropertySet ::= iri | 'a' | '^' ( iri | 'a' )
PathPropertySet ::= '(' Path ( '|' Path )+ ')'
For information, SPARQL property path grammar :
https://www.w3.org/TR/sparql11-property-paths/#path-syntax
TriplesSameSubjectPath ::= VarOrTerm PropertyListNotEmptyPath | TriplesNode PropertyListPath PropertyListPath ::= PropertyListNotEmpty? PropertyListNotEmptyPath::= ( VerbPath | VerbSimple ) ObjectList ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )* VerbPath ::= Path VerbSimple ::= Var Path ::= PathAlternative PathAlternative ::= PathSequence ( '|' PathSequence )* PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* PathElt ::= PathPrimary PathMod? PathEltOrInverse ::= PathElt | '^' PathElt PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) ) PathPrimary ::= ( IRIref | 'a' | '(' Path ')' )
--]]
local lexer = parser.lexer
local chain = parser.chain local alternative = parser.alternative local plus = parser.plus local idop = parser.idop local nary_op_parser = parser.nary_op_parser local lex_char = lexer.lex_char local parse_epsilon = lexer.lex_epsilon local lex_integer = lexer.lex_integer
-- grammar base lexer functions
local lex_pid = function(state) local res = lexer.lex_regex(state, "P[0-9]+") if res then res.type="Pid" return res end end
local lex_sparql_prefix = function(state) local res = lexer.lex_regex(state, "[a-z_]*") if res then res.type="prefix" return res end end
local lex_property_name = function(state) local res = lexer.lex_regex(state, "[a-zA-Z][a-z A-Z'-]*") if res then res.type="Plabel" return res end end
-- PathElt ::= PathPrimary PathMod? -- PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
function pparser.pathElt(state) local node local prime_node
local min_bound = nil local max_bound = nil
local function create_node(type) return idop( function(state) node = type:create(prime_node, min_bound, max_bound) end ) end
local res = chain{ pparser.pathPrimary, idop(function(state) prime_node = state.node end), alternative{ chain{ lex_char("*"), create_node(path.StarNode) }, chain{ lex_char("+"), create_node(path.PlusNode) }, chain{ lex_char("?"), create_node(path.MaybeNode) }, chain{ lex_char("^"), create_node(path.InverseNode) }, chain{ lex_char("{"), lex_integer, idop(function(state) min_bound = tonumber(state.lexed) end), alternative{ chain{ lex_char(","), lex_integer, idop(function(state) max_bound = tonumber(state.lexed) end) }, chain{ parse_epsilon, idop(function(state) max_bound = nil end) } }, create_node(path.BetweenNode, min_bound, max_bound), lex_char("}"), }, chain{ parse_epsilon, idop(function(state) node = prime_node end) } } }(state)
if res then res.node = node return res end end
-- PathEltOrInverse ::= PathElt | '^' PathElt
pparser.pathEltOrInverse = function(state)
return alternative{
pparser.pathElt,
chain{
lex_char("^"),
pparser.pathElt,
function(state)
state.node = path.InverseNode(state.node)
return state
end
}
}(state)
end
--[[
Tests :
plop=p.parse("P31",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes property=>
P31
plop=p.parse("P31>P279", p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes property=>
P279
node=>
P31
plop=p.parse("P31{1,6}",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
plop=p.parse("(P31|P17>P31)",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
nodes=>
1=> property=> P31 2=> property=> P31 node=> P17
--]]
pparser.pathSequence = nary_op_parser(
pparser.pathEltOrInverse,
alternative{
chain{
lexer.lex_char("/"),
pparser.pathEltOrInverse,
},
chain{
lexer.lex_char("\^"),
pparser.pathElt,
function(state)
state.node = path.InverseNode:create(state.node)
return state
end
}
},
function(acc) return path.SequenceNode:create(acc) end
)
--[[
Tests:
plop=p.parse("P31/P31+",p.pathSequence) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>
1=> property=> P31 2=> node=> property=> P31
--]]
-- PathAlternative ::= PathSequence ( '|' PathSequence )*
pparser.pathAlternative = nary_op_parser( pparser.pathSequence, chain{ lex_char("[|]"), pparser.pathSequence }, function(acc) return path.AlternativeNode:create(acc) end )
--[[ plop=p.parse("P31|P17/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>
1=> property=> P31 2=> nodes=> 1=> property=> P17 2=> node=> property=> P279
plop=p.parse("P31|P17>P31/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) yes nodes=>
1=> property=> P31 2=> nodes=> 1=> property=> P31 node=> P17 2=> node=> property=> P279
--]]
-- PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
local instance = function()
-- P31/P279*
return path.SequenceNode:create(
{
path.PropertyNode:create("P31"),
path.StarNode:create(path.PropertyNode:create("P279"))
}
)
end
-- PathPrimary ::= ( Prop | '!' NegatedPropertySet ) ( '>' ( Prop | '!' NegatedPropertySet ) ) ? | 'a' | '(' Path ')'
pparser.pathPrimary = function(state) local node
local res = alternative{ chain{ lex_char('a'), lex_char(' '), idop(function(state) node = instance() end) }, chain{ chain{ alternative{ pparser.prop, chain {lex_char('!'), pparser.negatedPropertySet} }, idop(function(state) node = state.node end) }, alternative{ chain{ pparser.pathQualifier, idop( function(state) node = path.QualifiedStatementNode:create( node, state.node ) end ) }, parse_epsilon } }, chain{ lexer.open_parenthesis, pparser.path, idop( function(state) node = state.node end ), lexer.close_parenthesis }, chain{ lexer.lex_char('!'), pparser.negatedPropertySet, idop( function(state) node = state.node end ) } }(state) if res then res.node = node return res end end
--[[ Tests :
p.parse("a ", p.pathPrimary) => yes p.parse("!P31", p.pathPrimary) => yes p.parse("!(P31|instance of)", p.pathPrimary) => yes
--]]
-- stupid function to be eliminated soon (hum) local function parsePropAndWrap(wrapper) return chain{ pparser.prop, function (state)
local node = state.node local nodes = {} nodes[1] = {} nodes[1].node = node -- TODO: understand why this is needed instead of just "nodes[1] = node" state.node = wrapper(nodes) return state end } end
pparser.pathPropertySetParser = function(final_node_creator)
return function(state) return chain{
lexer.open_parenthesis, alternative{ nary_op_parser( pparser.pathOneInPropertySet, chain{ lexer.lex_char("|"), pparser.pathOneInPropertySet }, final_node_creator, function (node)
local singlenodes = {} singlenodes[1] = node -- mmm -- singlenodes[1].node = node
return final_node_creator(singlenodes) end ), -- parsePropAndWrap(final_node_creator), -- case for "!(P31)" like patterns, naryopparser or something needs to be fixed to better handle this -- here the solution for negation is to create a negated set with only one property. chain{ parse_epsilon, function(state) state.node = final_node_creator({}) return state end } -- allows emty set (to mimic any qualifer allowed, equiv of «*») }, lexer.close_parenthesis
}(state) end
end
pparser.propOrSetParser = function(creator) return function(state) return alternative{ parsePropAndWrap(creator), -- case for the pattern !P31 , in case it’s negated this stills need to be wrapped on a negated set pparser.pathPropertySetParser(function(nodes) return creator(nodes) end), }(state) end end
-- '>' ( Prop | '!' NegatedPropertySet | PropertySet ) pparser.pathQualifier = chain{ lex_char(">"), alternative{ chain{ lex_char("!"), pparser.propOrSetParser(function(nodes) return path.NegatedPropertySetNode:create(nodes) end) }, pparser.propOrSetParser(function(nodes) return path.PropertySetNode:create(nodes) end) }, function(state) state.node = path.QualifierSnakNode:create(state.node) return state end } --[[ =p.parse(">!(P31|P31)",p.pathQualifier) =p.parse(">(P31|P31)",p.pathQualifier) =p.parse(">P31",p.pathQualifier) =p.parse(">!P31",p.pathQualifier) --]]
-- PathNegatedPropertySet ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
pparser.negatedPropertySet = pparser.pathPropertySetParser( function(nodes) return path.NegatedPropertySetNode:create(nodes) end )
--[[
Tests :
p.parse("!P31",p.negatedPropertySet) p.parse("(P31|P32)",p.negatedPropertySet) => yes p.parse("P31",p.negatedPropertySet) => yes p.parse("^P31",p.negatedPropertySet) => yes p.parse("^(P31)",p.negatedPropertySet) => nope p.parse("(P31)",p.negatedPropertySet) => yes p.parse("(^P31)",p.negatedPropertySet) => yes p.parse("(^P31|a|plop)",p.negatedPropertySet) => yes
All good(?)
--]]
-- PathOneInPropertySet ::= iri | 'a' | '^' ( iri | 'a' )
pparser.pathOneInPropertySet = function(state) local node = {}
local pElement = alternative{ chain{ lexer.lex_char('a'), idop(function(state) elem = instance() end) }, chain{ pparser.prop, idop(function(state) elem = state.node end) } }
local res = alternative{ chain{ lexer.lex_char("^"), pElement, idop(function(state) node = state.node end) }, chain{ pElement, idop(function(state) node = path.InverseNode:create(state.node) end) } }(state)
if res then res.node = node end return res end
-- Prop ::= IRIref | Pid | Pname pparser.prop = function(state) local res = alternative{ chain{ parser.questionmark( chain{ lex_sparql_prefix, lex_char(":") } ), lex_pid }, lex_property_name }(state)
if res then res.node = path.PropertyNode:create(res.lexed) return res end end --[[
Tests :
p.parse("a ", p.primary) => yes p.parse("P31@", p.prop) => nope p.parse("P31", p.prop) => nope p.parse("P31>P279", p.prop) => nope
--]]
-- PathFirstSequence ::= '>' PathQualifier ( '/' PathEltOrInverse | '^' PathElt )*
pparser.pathFirstSequence = nary_op_parser(
-- chain{
pparser.pathQualifier,
-- function(state)
-- state.node = path.QualifierSnakNode:create(state.node)
-- return state
-- end
-- },
chain{
lex_char("/"),
pparser.pathEltOrInverse
},
function (acc)
return path.SequenceNode:create(acc)
end
)
pparser.path = function(state) return pparser.pathAlternative(state) end
-- PathFirstAlternative ::= PathFirstSequence ( '|' PathFirstSequence )* | Path pparser.pathFirstAlternative = alternative{ pparser.path, nary_op_parser( pparser.pathFirstSequence, chain{ lex_char("|"), pparser.pathFirstSequence }, function(acc) return path.AlternativeNode:create(acc) end ), }
-- plop = p.parse_path("P31/P31/P31>P31/P31")
pparser.parse_path = function (property_path) local res = parser.parse(property_path, pparser.pathFirstAlternative) assert(res, "parsing returned a nil obj on path : «" .. property_path .. "»") return res end
-- to test in console pparser.parse = parser.parse
return pparser