Module:Wikidata/Chemin
Appearance
Documentation for this module may be created at Module:Wikidata/Chemin/doc
local datastructure = require "Module:Wikidata/Chemin/Path"
local parser = require "Module:Wikidata/Chemin/parser"
local results = require "Module:Wikidata/Chemin/Resultat"
local iter = require "Module:Iterateurs"
local props = require "Module:Propriétés"
local path = {}
--------------
-- TODO :
-- * Update the "between" path to handle it better epsilon paths
-- * Test full path rendering
-- *
--------------
-- Definition of a PropertyPath class
local PropertyPath = {}
PropertyPath.__index = PropertyPath
--[[ Datastructure for the paths that will match a path pattern
A path matching the pattern "subclass of*" will be a chain of statements and snaks nodes.
If we got statements of the form (no qualifiers here, just subject with the main statement snak) :
* <human> <subclass of> <ape>
* <ape> <subclass of> <mammal>
* <mammal> <subclass of> <animal>
a matching path like "<human> -> <ape> -> <mammal> -> <animal>" will be reprensented by a linked list of "ResultNode" objects.
A result node object is a mw.wikibase "statement" standard object augmented with a few methods and a link that goes from the statement or snak to the previous node in the path.
{
<mammal> <subclass of> <animal>
"parent" = {
<ape> <subclass of> <mammal>
"parent" = {
<human> <subclass of> <ape>
"parent" = EpsilonRNode(<human>, "parent" = nil)
}
}
}
--]]
local ResultNode = results.ResultNode
local StatementRNode = results.StatementRNode
local QualifierRNode = results.QualifierRNode
local EpsilonRNode = results.EpsilonRNode
------------------------------------------------------------------------------------------------------
local function iterate_on_snaks(
start_rnode,
property_filter_criteria,
snak_map_iterator,
rnode_type)
assert(snak_map_iterator)
return iter.pair_map(
iter.flatten(
iter.select_vals(
iter.pair_filter(
snak_map_iterator,
property_filter_criteria
)
),
iter.on_vals
),
function(value) return rnode_type:create(value, start_rnode) end
)
end
-- creates an iterator that will iterate over all the statements
-- of a specific property of an item
local function iterate_on_statement(start_rnode, property_filter_criteria)
local item = mw.wikibase.getEntity(start_rnode:item_value())
return iterate_on_snaks(
start_rnode,
property_filter_criteria,
iter.on_pairs(item.claims),
StatementRNode
)
end
local function iterate_on_statement_qualifier(statement, qualifier_filter_criteria)
if statement.qualifiers then
return iterate_on_snaks(
statement,
qualifier_filter_criteria,
iter.on_pairs(statement.qualifiers),
QualifierRNode
)
else
-- no qualifier table when the statement has no qualifiers
return function() return nil end
end
end
local iterate_on_statement_from_property = function(start_rnode, pid)
local claims = mw.wikibase.getBestStatements(
start_rnode:item_value(),
props.normalize(pid)
) or {}
return iter.pair_map(
iter.pair_filter(iter.on_pairs(claims), function(key, val) return true end),
function(key, value) return StatementRNode:create(value, start_rnode) end
)
end
local function iterate_on_star(start_rnode, child_pnode, depth, iterated, max)
-- start_rnode : the result node from which we will iterate
-- child_pnode : the path within the star operator (for example P31/P31 if our node is (P31/P31)*
iterated = iterated or {} -- iterated is the store of already iterated starting points items to avoid infinite loops
-- max : the max number of iteration depth to go, nil for no limit
depth = depth or 1
--[[
In pseudo code using a « yield » operator, the algorithm would be
algo star(startnode)
for each value v which match child_pnode from startnode
yield v
for each value vchild in star(v)
yield vchild
end for
end
end
But we can’t use a yield operator if the « coroutine » module on lua is not activated.
So we must translate this into something more complicated.
Luckily the approach to write iterators in term of composition seems to pay off and
it seem possible to write code structurally similar to this algorithm thanks to the
« flatten » iterator and a recursive closure that creates iterator to handle the
recursivity implied by the « star » operator nature.
--]]
function creator()
return function(start_rnode)
local depth_overflow = not (not max or depth < max)
if not iterated[start_rnode:item_value()] and not depth_overflow then
iterated[start_rnode:item_value()] = true
return iterate_on_star(start_rnode, child_pnode, depth + 1, iterated, max)
else
return function() end
end
end
end
return iter.chain(
iter.singleton(start_rnode),
iter.flatten(
child_pnode:iterate(start_rnode),
creator()
)
)
end
local iterate_on_plus = function(start_rnode, child_pnode, max_depth)
local first = true
iterated = iterated or {}
return iter.flatten(
child_pnode:iterate(start_rnode),
function(rnode)
return iterate_on_star(rnode, child_pnode, 1, iterated, max_depth)
end
)
end
--[[
Test :
p.test("Q5", "subclass of+") -- at the time writing, "Q215627" is the only direct superclass of human. It does not show up, but there is numerous superclass in the result
--]]
--[[ an iteraton to handle "/" operator sequences, for example « P31/P279* »
"creators" is a table of functions that needs to create iterators of child nodes.
In our example, the first cretors element will be a function that takes an item object and
will return an iterator over P31-statements of this item
the second one will create an iterator over the path « P279* » and so on.
The resulting iteratior will iterate on each elements of the second iterator starting from each iterator over the second one
for each elements in the first one.
--]]
local function iterate_on_iterator_creators(start_rnode, creators, i)
i = i or 1
if not(tonumber(i)) then i = 1 end
-- main iterator : the iterator that will iterate on the values on this node of the path
local main_iterator = creators[i]:iterate(start_rnode)
if i < #creators then
--trying to initialize the iterator for the next node with a value of the current one, if we can
local rnode = main_iterator()
while rnode and not(rnode:has_an_item()) do
rnode = main_iterator()
end
-- could not initialize the next iterator with a proper item ; returnun the empty iterator function
if not rnode then return function() return end end
-- we found a proper value to iterate on for the next node in the path
-- final iterator : the iterator that will iterate
-- on elems that will be returned by each iterations
-- on the iterator created by the main client
local final_iterator = iterate_on_iterator_creators(rnode, creators, i+1)
return function()
while final_iterator ~= nil do
-- pulling the element from the next node iterator in the sequence
local final_elem = final_iterator()
if final_elem then
return final_elem
else
-- we pulled the last elem for this value, getting a new value
-- for this node path and regenerate the next node iterator to pull new final values
local rnode_value = main_iterator()
-- return the element pulled from the next node iterator
-- if the property has item datatype is not a special value and has the right snaktype
-- as we can't continue path on other kind of values
if rnode_value then
if rnode_value:has_an_item() then
final_iterator = iterate_on_iterator_creators(rnode_value, creators, i+1)
end
else
--we're over, no next value for this node to continue the path
return
end
end
end
end
elseif i == #creators then
return main_iterator
end
end
--[[ JSBach : Q1339 ;
Testing with :
test("Q1339", "child/child")
wikidata query equivalent query :
select ?grandchild where {
wd:Q1339 wdt:P40/wdt:P40 ?grandchild
}
Adam : wd:Q70899
test("Q70899", "child/child/child")
wikidata query equivalent query :
select ?grandgrandchild where {
wd:Q70899 wdt:P40/wdt:P40/wdt:P40 ?grandgrandchild
}
--]]
local iterate_on_alternatives = function(start_rnode, pnodes)
local i=1
local current_iter = pnodes[i]:iterate(start_rnode)
return function ()
-- loop to go to next iterator if there is empty one in the list
while true do
local res = current_iter()
-- res is an iterator itself ; getting its result
if res then
return res
else
i = i + 1
if i <= #pnodes then
-- following to next iterator and resume loop
current_iter = pnodes[i]:iterate(start_rnode)
else
-- no current iterator : ending
return nil
end
end
end
end
end
--[[
Adam's father or mother : no value of course
p.test('Q70899', "P22|P25")
JS Bach's
p.test("Q1339", "P22|P25")
--]]
local function iterate_on_nodes_beetween(start_rnode, pnode, min, max)
local seq = {}
local i = 1
while i <= min do
table.insert(seq, pnode)
i = i + 1
end
local sequence_obj = {}
function sequence_obj:iterate(next_rnode)
return iterate_on_iterator_creators(next_rnode, seq, min)
end
if max then
local star_obj = {}
function star_obj:iterate(next_rnode)
return iterate_on_star(next_rnode, pnode, 1, iterated, max-min)
end
return iterate_on_iterator_creators(
start_rnode,
{
sequence_obj,
star_obj
}
)
else
return sequence_obj:iterate()
end
end
local function iterate_maybe(start_rnode, pnode)
local iterator = pnode:iterate(start_rnode)
local self_done = false
return function()
if not self_done then
local val = iterator()
if val then return val else
self_done = true
return start_rnode
end
end
end
end
function PropertyPath:new(str)
local obj = {["path"]=str}
setmetatable(obj, self)
local ast = parser.parse_path(str)
assert(ast, "parser did not return a node")
obj.node = ast
return obj
end
local function entityId(entity)
if type(entity) == 'string' then
return entity
end
return entity.id
end
local function norm_start_point(start_point)
if type(start_point) == "string" then
return EpsilonRNode:create(start_point)
elseif type(start_point) == "table" then
if start_point["claims"] ~= nil then
-- assume this is an item or entity object
return EpsilonRNode:create(start_point.id)
elseif start_point["is_RNode"] then
return start_point
elseif start_point["qualifiers"] or start_point["mainsnak"] then
local itemid = string.gmatch(start_point.id, "^.*[^$]")() -- extract the item id from the starting statement
return StatementRNode:create(start_point, EpsilonRNode:create(itemid))
end
end
mw.logObject(start_point)
error("from function norm_start_point of module PropertyPath : wrong type for start_point", tostring(start_point)) -- TODO : Log a better error
end
function PropertyPath:iterate(start_point)
start_point = norm_start_point(start_point)
return self.node:iterate(start_point)
end
local PropertyNode = datastructure.PropertyNode
local AlternativeNode = datastructure.AlternativeNode
local SequenceNode = datastructure.SequenceNode
local QualifiedStatementNode = datastructure.QualifiedStatementNode
local NegatedPropertySetNode = datastructure.NegatedPropertySetNode
local PlusNode = datastructure.PlusNode
local StarNode = datastructure.StarNode
local BetweenNode = datastructure.BetweenNode
local MaybeNode = datastructure.MaybeNode
local QualifierSnakNode = datastructure.QualifierSnakNode
function PropertyNode:iterate(rnode)
return iterate_on_statement_from_property(rnode, self.property)
end
--[[
test("Q5", "subclass of")
--]]
function AlternativeNode:iterate(rnode)
return iterate_on_alternatives(rnode, self.nodes)
end
function NegatedPropertySetNode:iterate(rnode)
return iterate_on_statement(rnode,
function (property, val) return self:matches(property) end
)
end
--[[
test("Q90, ""!(P150)")
--]]
function SequenceNode:iterate(rnode)
return iterate_on_iterator_creators(rnode, self.nodes)
end
function QualifiedStatementNode:iterate(rnode)
local statement_iterator = iterate_on_statement(
rnode,
function (key, value)
return self.property:matches(key)
end
)
local qualifier_iterator_creator = function(statement)
return iterate_on_statement_qualifier(
statement,
function (key, value) return self.qualifier:matches(key) end
)
end
return iter.flatten(statement_iterator, qualifier_iterator_creator)
end
--[[ to test with :
p.test("Q79529", "union of>of")
p.test("Q105019",'P22{1,6}'
--]]
function QualifierSnakNode:iterate(statementnode)
return iterate_on_statement_qualifier(
statementnode,
function (key, value) return self:matches(key) end
)
end
--[[ to test with :
for x in p.iterate("Q79529", "union of") do p.test(x, ">of") end
--]]
function StarNode:iterate(rnode)
return iterate_on_star(rnode, self.node)
end
function PlusNode:iterate(rnode)
return iterate_on_plus(rnode, self.node)
end
function BetweenNode:iterate(rnode)
return iterate_on_nodes_beetween(rnode, self.node, self.min, self.max)
end
function MaybeNode:iterate(rnode)
return iterate_maybe(rnode, self.node)
end
-- returns an iterator on the result set of a path from a specific node
-- ppath acn either be a string representing a path or a compiled path
function path.iterate(start_node, ppath)
if start_node == nil then error("the start node is mandatory to get result on a path, it is nil") end
if type(ppath) == "table" then
return ppath:iterate(start_node)
else
return path.PropertyPath:new(ppath):iterate(start_node)
end
end
-- function that return a boolean
-- true if there is a path matching ppath from start_node that ends with the value "value"
-- (currently only works if "value" is a Qid string)
function path.matches(start_node, ppath, value)
for val in path.iterate(start_node, ppath) do
if val:item_value() == value then
return true
end
end
return false
end
----------------------------
--[[
p.test("Q5", "P279")
p.test(mw.wikibase.getEntity("Q5"), "P279")
for x in p.iterate(mw.wikibase.getEntity("Q5"), "P279") do p.test(x, "P279") end -- test if we can continue iteration of an RNode object
Complex test :
p.test("Q27929033","P1552>!()/P31") => OK
p.test("Q27929033","subclass of/P1552>!()/P31") => NOK
--]]
function path.test(start_point, ppath)
for x in path.iterate(start_point, ppath) do
mw.log("woot")
if x then
mw.log(x:item_value())
end
end
end
-----------------
-- fonctions retournant une déclaration ou un snak qualificatif en fonction d’un chemin de propriété
-- utilisé pour les tris, retourner une clé de tri pour une déclaration choisie par un chemin ou un ensemble de chemins
function path.snak_key_by_path(path)
local path_to_key = path.PropertyPath:new(path)
return function(claim)
return (path_to_key:iterate(claim)())
end
end
-- takes several property paths and creates a function that returns
-- the first value with a match
-- example : local get_key = wd.snak_key_by_paths{">P80","P800|P801"}
-- get_key(claim)
-- returns the qualifier value of P80 of the claim if it exists, if not returns the main statement value
-- of P800 of the main value of the « claim » statement, if not the P801 one
-- (used in Module:Infobox/fonctions/personne)
-- Note on the example : TODO : would be equivalent to a single path ">P80|P800|P801" but it’s not possible yet
function path.snak_key_by_paths(paths)
local paths_to_key = {}
for k, pat in ipairs(paths) do
paths_to_key[#paths_to_key + 1] = path.PropertyPath:new(pat)
end
return function(claim)
-- returns the first value of the first matching path starting from « claim »
for k, path_to_key in ipairs(paths_to_key) do
local res = path_to_key:iterate(claim)()
if res then return res end
end
end
end
----------------------------
path.PropertyPath = PropertyPath
return path