Модуль:WikidataSelectors
Материал из Томская энциклопедии
Для документации этого модуля может быть создана страница Модуль:WikidataSelectors/doc
local i18n = { ["errors"] = { ["rank-not-valid"] = "Некорретное значение приоритета (rank)", ["cant-parse-condition"] = "Не удалось разобрать условие" } } local validRanks = { 'best', 'preferred', 'normal', 'deprecated' } --[[ Internal function for error message Input: key in errors table Output: error message ]] local function throwError( key ) error( i18n.errors[key] ) end local p = {} --[[ Main function for parse selectors and filter statements Input: statements table, selector string Output: filtered statements table ]] function p.filter( allClaims, propertySelector ) propertySelector = mw.text.trim( propertySelector ) -- Get property ID from selector local propertyId = mw.ustring.match( propertySelector, '^[Pp]%d+' ) if not propertyId then propertyId = '' end local initPos = #propertyId + 1 propertyId = string.upper( propertyId ) if ( not allClaims ) then return nil end local allPropertyClaims = allClaims[propertyId] if ( not allPropertyClaims ) then return nil end -- Gathering rules rules = p.matchSelectors( propertySelector, initPos ) -- If there is no rank filter, than default rank is 'best' local isRanked = false for i, subRules in ipairs( rules ) do for j, rule in ipairs( subRules ) do if rule['type'] == 'rank' then isRanked = true break end end end if not isRanked then table.insert( rules, 1, { { type = 'rank', value = 'best' } } ) end -- Execute rules allPropertyClaims = p.applyRules( allPropertyClaims, rules ) return allPropertyClaims end --[[ Match and gather selector rules Input: string with selectors rules, start position Output: rules table ]] function p.matchSelectors( selectorsString, initPos ) local rules = {} local rawRulePattern = '^%s*%[%s*[^%[%]]+%s*%]%s*' local rulePattern = '^%s*%[%s*([^%[%]]+)%s*%]%s*$' if not initPos then initPos = 1 end local rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos ) while rawRule do initPos = initPos + #rawRule rule = mw.ustring.match( rawRule, rulePattern ) rule = mw.text.trim( rule ) local subRules = mw.text.split( rule, '%s*,%s*' ) local commands = {} local comm for i, subRule in ipairs( subRules ) do local isInversed = false if mw.ustring.match( subRule, '^!' ) then isInversed = true subRule = mw.ustring.match( subRule, '^!%s*(.+)$' ) end -- p123[1] if mw.ustring.match( subRule, '^%d+$' ) then table.insert( commands, { type = 'position', value = subRule, inversed = isInversed } ) -- p123[rank:preferred] elseif mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' ) then rank = mw.ustring.match( subRule, '^rank%s*:%s*(%a+)$' ) table.insert( commands, { type = 'rank', value = rank, inversed = isInversed } ) -- p123[language:xx] elseif mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' ) then value = mw.ustring.match( subRule, '^language%s*:%s*([%a%-]+)$' ) table.insert( commands, { type = 'language', value = value, inversed = isInversed } ) -- p123[language!:xx] elseif mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' ) then value = mw.ustring.match( subRule, '^language%s*!:%s*([%a%-]+)$' ) table.insert( commands, { type = 'language', value = value, inversed = not isInversed } ) -- p123[unit:q789] elseif mw.ustring.match( subRule, '^unit%s*:%s*[^%[%],:]+$' ) then value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' ) table.insert( commands, { type = 'unit', value = value, inversed = isInversed } ) -- p123[unit!:q789] elseif mw.ustring.match( subRule, '^unit%s*!:%s*[^%[%],:]+$' ) then value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' ) table.insert( commands, { type = 'unit', value = value, inversed = not isInversed } ) -- p123[p456] elseif mw.ustring.match( subRule, '^[Pp]%d+$' ) then qualifier = mw.ustring.match( subRule, '^[Pp]%d+' ) table.insert( commands, { type = 'qualifier', qualifier = qualifier, value = nil, inversed = isInversed } ) -- p123[p456:q789] elseif mw.ustring.match( subRule, '^[Pp]%d+%s*:%s*[^%[%],:]+$' ) then qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*:?' ) value = mw.ustring.match( subRule, ':%s*([^%[%],:]+)$' ) table.insert( commands, { type = 'qualifier', qualifier = qualifier, value = value, inversed = isInversed } ) -- p123[p456!:q789] elseif mw.ustring.match( subRule, '^[Pp]%d+%s*!:%s*[^%[%],:]+$' ) then qualifier = mw.ustring.match( subRule, '^([Pp]%d+)%s*!:?' ) value = mw.ustring.match( subRule, '!:%s*([^%[%],:]+)$' ) table.insert( commands, { type = 'qualifier', qualifier = qualifier, value = value, inversed = not isInversed } ) -- p123[q456] elseif mw.ustring.match( subRule, '^[Qq]%d+$' ) then value = mw.ustring.match( subRule, '^[Qq]%d+' ) table.insert( commands, { type = 'value', value = value, inversed = isInversed } ) else throwError( 'cant-parse-condition' ) end end if #commands then table.insert( rules, commands ) end rawRule = mw.ustring.match( selectorsString, rawRulePattern, initPos ) end return rules end --[[ Intercept statements with selector rules Input: statements table, selector rules Output: filtered statements table ]] function p.applyRules( claims, rules ) for i, subRules in ipairs( rules ) do local newClaims = {} for j, rule in ipairs( subRules ) do if rule['type'] == 'rank' then table.insert( newClaims, p.filterByRank( claims, rule['value'], rule['inversed'] ) ) elseif rule['type'] == 'language' then table.insert( newClaims, p.filterByLanguage( claims, rule['value'], rule['inversed'] ) ) elseif rule['type'] == 'unit' then table.insert( newClaims, p.filterByUnit( claims, rule['value'], rule['inversed'] ) ) elseif rule['type'] == 'position' then table.insert( newClaims, p.filterByPosition( claims, rule['value'], rule['inversed'] ) ) elseif rule['type'] == 'qualifier' then table.insert( newClaims, p.filterByQualifier( claims, rule['qualifier'], rule['value'], rule['inversed'] ) ) elseif rule['type'] == 'value' then table.insert( newClaims, p.filterByValue( claims, rule['value'], rule['inversed'] ) ) end end claims = {} --[[ Merge all claims TODO: It's not good ]] for j, newSubClaims in ipairs( newClaims ) do for k, newClaim in ipairs( newSubClaims ) do local isNew = true for l, oldClaim in ipairs( claims ) do if oldClaim['id'] == newClaim['id'] then isNew = false break end end if isNew then table.insert( claims, newClaim ) end end end end return claims end --[[ Filter statements by rank Input: claims table, rank value, inversion Output: filtered statements table ]] function p.filterByRank( claims, rank, inversed ) if not inversed then inversed = false end if not rank then rank = 'best' end -- Check if rank value is valid local isValidRank = false for i, validRank in ipairs( validRanks ) do if rank == validRank then isValidRank = true break end end if not isValidRank then throwError( 'rank-not-valid' ) end -- Find the best rank if rank == 'best' then rank = 'normal' -- default rank (don't use deprecated even if it's no more claims) -- If we have at least one preferred rank, mark it as best for i, statement in pairs( claims ) do if (statement.rank == 'preferred') then rank = 'preferred' break end end end local resultClaims = {}; for i, statement in pairs( claims ) do if ( statement.rank == rank ) ~= inversed then table.insert( resultClaims, statement ) end end return resultClaims end --[[ Filter statements by language of value Input: claims table, language, inversion Output: filtered statements table ]] function p.filterByLanguage( claims, language, inversed ) if not inversed then inversed = false end local resultClaims = {} local mulStatement = {} for i, statement in ipairs( claims ) do isMatchLanguage = false if statement['mainsnak'] and statement['mainsnak']['datavalue'] and statement['mainsnak']['datavalue']['value'] and statement['mainsnak']['datavalue']['value']['language'] then if statement['mainsnak']['datavalue']['value']['language'] == language then isMatchLanguage = true end if statement['mainsnak']['datavalue']['value']['language'] == 'mul' then mulStatement = statement end end if isMatchLanguage ~= inversed then table.insert( resultClaims, statement ) end end if next(resultClaims) == nil and next(mulStatement) ~= nil then -- if specific language is not found, but there is Q20923490 value table.insert( resultClaims, mulStatement ) end return resultClaims end --[[ Filter statements by unit of value Input: claims table, unit, inversion Output: filtered statements table ]] function p.filterByUnit( claims, unit, inversed ) if not inversed then inversed = false end unit = 'http://www.wikidata.org/entity/' .. string.upper( unit ) local resultClaims = {} for i, statement in ipairs( claims ) do isMatchUnit = false mw.log(statement['mainsnak']['datavalue']['value']['unit']) if statement['mainsnak'] and statement['mainsnak']['datavalue'] and statement['mainsnak']['datavalue']['value'] and statement['mainsnak']['datavalue']['value']['unit'] and statement['mainsnak']['datavalue']['value']['unit'] == unit then isMatchUnit = true end if isMatchUnit ~= inversed then table.insert( resultClaims, statement ) break end end return resultClaims end --[[ Filter statements by position Input: claims table, position, inversion Output: filtered statements table ]] function p.filterByPosition( claims, position, inversed ) if not inversed then inversed = false end local resultClaims = {}; for statementPosition, statement in ipairs( claims ) do if ( statementPosition == tonumber( position ) ) ~= inversed then table.insert( resultClaims, statement ) break end end return resultClaims end --[[ Filter statements by qualifier existance or it's value Input: claims table, ID of qualifier's property, qualifier's value, inversion Output: filtered statements table ]] function p.filterByQualifier( claims, qualifierId, value, inversed ) if not inversed then inversed = false end qualifierId = string.upper( qualifierId ) local resultClaims = {} for i, statement in ipairs( claims ) do if statement['qualifiers'] and statement['qualifiers'][qualifierId] then if value == nil then if ( #statement['qualifiers'][qualifierId] > 0 ) ~= inversed then table.insert( resultClaims, statement ) end else local isQualifierFound = false for j, qualifier in ipairs( statement['qualifiers'][qualifierId] ) do local qualifierValue = qualifier['datavalue']['value'] if qualifier['datavalue']['type'] == 'wikibase-entityid' then qualifierValue = qualifierValue.id value = string.upper( value ) end if qualifierValue == value then isQualifierFound = true break end end if isQualifierFound ~= inversed then table.insert( resultClaims, statement ) end end elseif inversed then table.insert( resultClaims, statement ) end end return resultClaims end --[[ Filter statements by it's values Input: claims table, value, inversion Output: filtered statements table ]] function p.filterByValue( claims, value, inversed ) if not inversed then inversed = false end local resultClaims = {} for i, statement in ipairs( claims ) do local statementValue = statement['mainsnak']['datavalue']['value'] if statement['mainsnak']['datavalue']['type'] == 'wikibase-entityid' then statementValue = statementValue.id value = string.upper( value ) end if ( statementValue == value ) ~= inversed then table.insert( resultClaims, statement ) end end return resultClaims end return p