Jump to content

Module:Wikidata/Chemin

Katuka Wikipedia

Documentation for this module may be created at Module:Wikidata/Chemin/doc

local datastructure = require "Module:Wikidata/Chemin/Path"
local parser = require "Module:Wikidata/Chemin/parser"
local results = require "Module:Wikidata/Chemin/Resultat"

local iter = require "Module:Iterateurs"
local props = require "Module:Propriétés"

local path = {}

--------------
-- TODO : 
--        * Update the "between" path to handle it better epsilon paths
--        * Test full path rendering
--        * 
--------------

-- Definition of a PropertyPath class

local PropertyPath = {}
PropertyPath.__index = PropertyPath

--[[ Datastructure for the paths that will match a path pattern
A path matching the pattern "subclass of*" will be a chain of statements and snaks nodes.  
If we got statements of the form (no qualifiers here, just subject with the main statement snak) :
* <human> <subclass of> <ape>
* <ape> <subclass of> <mammal>
* <mammal> <subclass of> <animal>

a matching path like "<human> -> <ape> -> <mammal> -> <animal>" will be reprensented by a linked list of "ResultNode" objects. 
A result node object is a mw.wikibase "statement" standard object augmented with a few methods and a link that goes from the statement or snak to the previous node in the path.
{ 
   <mammal> <subclass of> <animal>
   "parent" = {
      <ape> <subclass of> <mammal>
      "parent" = {
         <human> <subclass of> <ape>
         "parent" = EpsilonRNode(<human>, "parent" = nil)
      }
   }
}

--]]

local ResultNode = results.ResultNode
local StatementRNode = results.StatementRNode
local QualifierRNode = results.QualifierRNode
local EpsilonRNode = results.EpsilonRNode


------------------------------------------------------------------------------------------------------


local function iterate_on_snaks(
	start_rnode, 
	property_filter_criteria, 
	snak_map_iterator,
	rnode_type)
    assert(snak_map_iterator)
	return iter.pair_map(
    	iter.flatten(
    		iter.select_vals(
    			iter.pair_filter(
    				snak_map_iterator, 
    				property_filter_criteria
    			)
    		),
    		iter.on_vals
    	),
    	function(value) return rnode_type:create(value, start_rnode) end
    )
end

-- creates an iterator that will iterate over all the statements
-- of a specific property of an item

local function iterate_on_statement(start_rnode, property_filter_criteria)
	local item = mw.wikibase.getEntity(start_rnode:item_value())
	return iterate_on_snaks(
		start_rnode, 
		property_filter_criteria,
		iter.on_pairs(item.claims),
		StatementRNode
	)
end

local function iterate_on_statement_qualifier(statement, qualifier_filter_criteria)
	if statement.qualifiers then
		return iterate_on_snaks(
			statement, 
			 qualifier_filter_criteria,
			iter.on_pairs(statement.qualifiers),
			QualifierRNode
		)
	else
		-- no qualifier table when the statement has no qualifiers
		return function() return nil end
	end
end

local iterate_on_statement_from_property = function(start_rnode, pid)
    local claims = mw.wikibase.getBestStatements(
    	start_rnode:item_value(),
      	props.normalize(pid)
    ) or {}
	
    return iter.pair_map(
    	iter.pair_filter(iter.on_pairs(claims), function(key, val) return true end),
    	function(key, value) return StatementRNode:create(value, start_rnode) end
    )
end


local function iterate_on_star(start_rnode, child_pnode, depth, iterated, max)
	
	-- start_rnode : the result node from which we will iterate
	-- child_pnode : the path within the star operator (for example P31/P31 if our node is (P31/P31)*
	iterated = iterated or {} -- iterated is the store of already iterated starting points items to avoid infinite loops
	-- max : the max number of iteration depth to go, nil for no limit
	
	depth = depth or 1
	
	--[[
	In pseudo code using a « yield » operator, the algorithm would be
	
	algo star(startnode)
	   for each value v which match child_pnode from startnode
	      yield v
	      for each value vchild in star(v)
	         yield vchild
	      end for
	   end 
	end
	
	But we can’t use a yield operator if the « coroutine » module on lua is not activated. 
	So we must translate this into something more complicated.
	
	Luckily the approach to write iterators in term of composition seems to pay off and
	it seem possible to write code structurally similar to this algorithm thanks to the 
	« flatten » iterator and a recursive closure that creates iterator to handle the
	recursivity implied by the « star » operator nature.

	--]]
	
	function creator()
		return function(start_rnode)
			local depth_overflow = not (not max or depth < max)
			
			if not iterated[start_rnode:item_value()] and not depth_overflow then
				iterated[start_rnode:item_value()] = true
				return iterate_on_star(start_rnode, child_pnode, depth + 1, iterated, max)
			else
				return function() end
			end
		end
	end
		
	return iter.chain(
		iter.singleton(start_rnode),
		iter.flatten(
			child_pnode:iterate(start_rnode), 
			creator()
		)
	)
end

local iterate_on_plus = function(start_rnode, child_pnode, max_depth)
	local first = true
	iterated = iterated or {}
	
	return iter.flatten(
		child_pnode:iterate(start_rnode),
		function(rnode)
			return iterate_on_star(rnode, child_pnode, 1, iterated, max_depth)
		end
	)

end

--[[
Test :
p.test("Q5", "subclass of+") -- at the time writing, "Q215627" is the only direct superclass of human. It does not show up, but there is numerous superclass in the result
--]]

--[[ an iteraton to handle "/" operator sequences, for example « P31/P279* »
 "creators" is a table of functions that needs to create iterators of child nodes.
 In our example, the first cretors element will be a function that takes an item object and
   will return an iterator over P31-statements of this item
 the second one will create an iterator over the path « P279* » and so on.
 The resulting iteratior will iterate on each elements of the second iterator starting from each iterator over the second one
 for each elements in the first one.
--]]


local function iterate_on_iterator_creators(start_rnode, creators, i)
	i = i or 1
	if not(tonumber(i)) then i = 1 end
	-- main iterator : the iterator that will iterate on the values on this node of the path
	local main_iterator = creators[i]:iterate(start_rnode)
	
	if i < #creators then
		--trying to initialize the iterator for the next node with a value of the current one, if we can
		local rnode = main_iterator()
		
		while rnode and not(rnode:has_an_item()) do
			rnode = main_iterator()
		end

		-- could not initialize the next iterator with a proper item ; returnun the empty iterator function
		if not rnode then return function() return end end
		
		-- we found a proper value to iterate on for the next node in the path
		
		-- final iterator : the iterator that will iterate 
		-- on elems that will be returned by each iterations 
		-- on the iterator created by the main client
		
		local final_iterator = iterate_on_iterator_creators(rnode, creators, i+1)
		return function()
			while final_iterator ~= nil do
				-- pulling the element from the next node iterator in the sequence
				local final_elem = final_iterator()
				if final_elem then
					return final_elem
				else
					-- we pulled the last elem for this value, getting a new value 
					-- for this node path and regenerate the next node iterator to pull new final values
					
					local rnode_value = main_iterator()
					
					-- return the element pulled from the next node iterator
					-- if the property has item datatype is not a special value and has the right snaktype
					-- as we can't continue path on other kind of values
					
					if rnode_value then
						if rnode_value:has_an_item() then
							final_iterator = iterate_on_iterator_creators(rnode_value, creators, i+1)
						end
					else
						--we're over, no next value for this node to continue the path
						return
					end
				end
			end
		end
	elseif i == #creators then
		return main_iterator
	end
end

--[[ JSBach : Q1339 ;
Testing with :
test("Q1339", "child/child")

wikidata query equivalent query : 
select ?grandchild where {
  wd:Q1339 wdt:P40/wdt:P40 ?grandchild
}


Adam : wd:Q70899 
test("Q70899", "child/child/child")
wikidata query equivalent query : 
select ?grandgrandchild where {
  wd:Q70899 wdt:P40/wdt:P40/wdt:P40 ?grandgrandchild
}
--]]

local iterate_on_alternatives = function(start_rnode, pnodes)
	local i=1
	local current_iter = pnodes[i]:iterate(start_rnode)
	
    return function ()
    	-- loop to go to next iterator if there is empty one in the list
        while true do
	        local res = current_iter()
	        -- res is an iterator itself ; getting its result
	        if res then
	        	return res
	        else
	        	i = i + 1
	        	if i <= #pnodes then 
	        		-- following to next iterator and resume loop
	        		current_iter = pnodes[i]:iterate(start_rnode)
	        	else
	        		-- no current iterator : ending
	        		return nil
	        	end
	        end
        end
    end
end

--[[
Adam's father or mother : no value of course
p.test('Q70899', "P22|P25")

JS Bach's
p.test("Q1339", "P22|P25")

--]]

local function iterate_on_nodes_beetween(start_rnode, pnode, min, max)
	local seq = {}
	local i  = 1

	while i <= min do
		table.insert(seq, pnode)
		i = i + 1
	end
	
	local sequence_obj = {}
	function sequence_obj:iterate(next_rnode)
		return iterate_on_iterator_creators(next_rnode, seq, min)
	end
	if max then
		local star_obj = {}
		function star_obj:iterate(next_rnode)
			return iterate_on_star(next_rnode, pnode, 1, iterated, max-min)
		end
		return iterate_on_iterator_creators(
			start_rnode,
			{
				sequence_obj,
				star_obj
			}
		)
	else
		return sequence_obj:iterate()
	end
end

local function iterate_maybe(start_rnode, pnode)
	local iterator = pnode:iterate(start_rnode)
	local self_done = false
	return function()
		if not self_done then
			local val = iterator()
			if val then return val else
				self_done = true
				return start_rnode
			end
		end
	end
end

function PropertyPath:new(str)
    local obj = {["path"]=str} 
    setmetatable(obj, self)
    
    local ast = parser.parse_path(str)
    assert(ast, "parser did not return a node")
    obj.node  = ast
    
    return obj
end

local function entityId(entity)
	if type(entity) == 'string' then
		return entity
	end
	return entity.id
end


local function norm_start_point(start_point)
	if type(start_point) == "string" then
		return EpsilonRNode:create(start_point)
	elseif type(start_point) == "table" then
		if start_point["claims"] ~= nil then
			-- assume this is an item or entity object
			return EpsilonRNode:create(start_point.id)
		elseif start_point["is_RNode"] then
			return start_point
		elseif start_point["qualifiers"] or start_point["mainsnak"] then
			local itemid = string.gmatch(start_point.id, "^.*[^$]")() -- extract the item id from the starting statement
			return StatementRNode:create(start_point, EpsilonRNode:create(itemid))
		end
	end
	mw.logObject(start_point)
	error("from function norm_start_point of module PropertyPath : wrong type for start_point", tostring(start_point)) -- TODO : Log a better error
end

function PropertyPath:iterate(start_point)
	start_point = norm_start_point(start_point)
	return self.node:iterate(start_point)
end

local PropertyNode = datastructure.PropertyNode
local AlternativeNode = datastructure.AlternativeNode
local SequenceNode = datastructure.SequenceNode
local QualifiedStatementNode = datastructure.QualifiedStatementNode
local NegatedPropertySetNode = datastructure.NegatedPropertySetNode
local PlusNode = datastructure.PlusNode
local StarNode = datastructure.StarNode
local BetweenNode = datastructure.BetweenNode
local MaybeNode = datastructure.MaybeNode
local QualifierSnakNode = datastructure.QualifierSnakNode

function PropertyNode:iterate(rnode)
	return iterate_on_statement_from_property(rnode, self.property)
end

--[[
test("Q5", "subclass of")
--]]

function AlternativeNode:iterate(rnode) 
	return iterate_on_alternatives(rnode, self.nodes)
end

function NegatedPropertySetNode:iterate(rnode)
	return iterate_on_statement(rnode,
		function (property, val) return self:matches(property) end
	)
end

--[[
test("Q90, ""!(P150)")
--]]

function SequenceNode:iterate(rnode) 
	return iterate_on_iterator_creators(rnode, self.nodes)
end

function QualifiedStatementNode:iterate(rnode)
	local statement_iterator = iterate_on_statement(
		rnode,
		function (key, value)
			return self.property:matches(key)
		end
	)
	local qualifier_iterator_creator = function(statement) 
		return iterate_on_statement_qualifier(
			statement, 
			function (key, value) return self.qualifier:matches(key) end
		)
	end
	
	return iter.flatten(statement_iterator, qualifier_iterator_creator)
end

--[[ to test with :
p.test("Q79529", "union of>of")
p.test("Q105019",'P22{1,6}'
--]]

function QualifierSnakNode:iterate(statementnode)
		return iterate_on_statement_qualifier(
			statementnode, 
			function (key, value) return self:matches(key) end
	)
end

--[[ to test with :
for x in p.iterate("Q79529", "union of") do p.test(x, ">of") end
--]]

function StarNode:iterate(rnode)
	return iterate_on_star(rnode, self.node)
end

function PlusNode:iterate(rnode)
	return iterate_on_plus(rnode, self.node)
end

function BetweenNode:iterate(rnode)
	return iterate_on_nodes_beetween(rnode, self.node, self.min, self.max)
end

function MaybeNode:iterate(rnode)
	return iterate_maybe(rnode, self.node)
end

-- returns an iterator on the result set of a path from a specific node
-- ppath acn either be a string representing a path or a compiled path
function path.iterate(start_node, ppath)
	if start_node == nil then error("the start node is mandatory to get result on a path, it is nil") end
		
	if type(ppath) == "table" then
		return ppath:iterate(start_node)
	else
		return path.PropertyPath:new(ppath):iterate(start_node)
	end
end

-- function that return a boolean
-- true if there is a path matching ppath from start_node that ends with the value "value"
-- (currently only works if "value" is a Qid string)
function path.matches(start_node, ppath, value)
        for val in path.iterate(start_node, ppath) do
                if val:item_value() == value then
                       return true
                end
        end
        return false
end

----------------------------

--[[
p.test("Q5", "P279")
p.test(mw.wikibase.getEntity("Q5"),  "P279")
for x in p.iterate(mw.wikibase.getEntity("Q5"), "P279") do p.test(x,  "P279") end -- test if we can continue iteration of an RNode object
Complex test : 
p.test("Q27929033","P1552>!()/P31") => OK
p.test("Q27929033","subclass of/P1552>!()/P31") => NOK

--]]
function path.test(start_point, ppath)
	for x in path.iterate(start_point, ppath) do 
		mw.log("woot")
		if x then
			mw.log(x:item_value())
		end
	end
end

-----------------
-- fonctions retournant une déclaration ou un snak qualificatif en fonction d’un chemin de propriété
-- utilisé pour les tris, retourner une clé de tri pour une déclaration choisie par un chemin ou un ensemble de chemins

function path.snak_key_by_path(path)
	local path_to_key = path.PropertyPath:new(path)
	return function(claim)
		return (path_to_key:iterate(claim)())
	end
end

-- takes several property paths and creates a function that returns 
-- the first value with a match

-- example : local get_key = wd.snak_key_by_paths{">P80","P800|P801"}
--           get_key(claim)
-- returns the qualifier value of P80 of the claim if it exists, if not returns the main statement value
-- of P800 of the main value of the « claim » statement, if not the P801 one
-- (used in Module:Infobox/fonctions/personne)

-- Note on the example : TODO : would be equivalent to a single path ">P80|P800|P801" but it’s not possible yet

function path.snak_key_by_paths(paths)
	local paths_to_key = {} 
	for k, pat in ipairs(paths) do
		paths_to_key[#paths_to_key + 1] = path.PropertyPath:new(pat)
	end
	return function(claim)
		-- returns the first value of the first matching path starting from « claim »
		for k, path_to_key in ipairs(paths_to_key) do
			local res = path_to_key:iterate(claim)()
			if res then return res end
		end
	end
end


----------------------------

path.PropertyPath = PropertyPath

return path