Module:DPLlua

From the RuneScape Wiki, the wiki for all things RuneScape
Jump to navigation Jump to search
Module documentation
This documentation is transcluded from Module:DPLlua/doc. [edit] [history] [purge]
Module:DPLlua requires Module:LibraryUtil.
Module:DPLlua requires Module:Paramtest.
Function list
L 13 — split
L 33 — trim
L 37 — mergeItem
L 57 — escape
L 69 — unEscape
L 73 — fixCurlyBrackets
L 79 — removeFormattingSettings
L 116 — formatInclude
L 156 — formatDpl
L 249 — toTable
L 360 — dpl.ask

Uses Template:DPLlua helper to make it possible to include all parameters of a template while maintaining good performance.

This module is a helper module to be used by other modules; it may not designed to be invoked directly. See RuneScape:Lua/Helper modules for a full list and more information. For a full list of modules using this helper click here

FunctionTypeUseExample
ask( ... )tablesask takes a series of tables each containing the settings for a DPL query; it will return the same number of result tables as input tables. All formatting settings are stripped from the config. If the config does not contain include, the result will be a simple list of page names.
{
	<pagename#1>,
	<pagename#2>,
	<pagename#3>,
}

A query with an include of the form include = '{template#1}:1:2:param1:param2, {template#2}:3:param1, %0' will give a result like

{
	['include'] = {
		['template#1'] = {
			[1] = val#1,
			[2] = val#2,
			['param1'] = val#3,
			['param2'] = val#4,
		},
		['template#2'] = {
			[3] = val#5,
			['param1'] = val#6,
		},
		['%0'] = val#7
	},
	['title'] = <pagename>
}

You can also do include = '{some template}' which will include all parameters of that template in their unexpanded form (templates are not expanded but some content in parser tags is placed in strip markers).

If a template appears multiple times on the same page then that page will appear multiple times in the returned list where each appearance corresponds with an different occurrence of the template on that page. This behaviour can be changes by adding groupMultiTemplateResults = true to the dpl config which results in for example val#1 of the above example will be be a table like {val#1 of template 1, val#1 of template 2, ...}, etc.

If the config value count is larger than 500 it will automatically generate multiple DPL queries with offsets and their outputs will be combined in the returned result.

If the DPL throws an error it will be available in the error field of the output.

Differences with normal DPL:

  • All formatting options are ignored
  • Using include = '{template}' will include all the arguments of that template instead of expanding the template
  • The parameter count can go higher than 500
  • A new option groupMultiTemplateResults is added.
  • When the value of a parameter is a table it will be expanded into multiple lines. E.g. doing notcategory = {val#1, val#2} will expand into
|notcategory = val#1
|notcategory = val#2
Note, if you include a whole template (e.g. include = '{some template}'), content inside strip markers (not nowiki) can't be cleaned up inside lua so pipe characters (|) will be replaced with § characters and the { and } characters are replaced by (U+2774) and (U+2775). Use include = '{some template}, {some template}:1:2:3' instead for the problem parameters.

-- <nowiki>
local dpl = {}
local libraryUtil = require( 'libraryUtil' )
local hasContent = require( 'Module:Paramtest' ).has_content
local checkType = libraryUtil.checkType
local checkTypeForNamedArg = libraryUtil.checkTypeForNamedArg

dpl.pipe = '¦'
local dataContentMarker = '`#@@#`'
local allIncludedParamNames = {}

-- Custom function for splitting a string because mw.text.split() is waaay too slow
local function split( str, pattern, plain )
	local res = {}
	local continue = true
	local startIndex = 1

	while continue do
		local i, j = string.find( str, pattern, startIndex, plain )
		if i then
			table.insert( res, string.sub( str, startIndex, i-1 ) )
			startIndex = j + 1
		else
			table.insert( res, string.sub( str, startIndex ) )
			continue = false
		end
	end

	return res
end

-- Also custom function for speed
local function trim( str )
	return (string.gsub( str, '^%s+', '' ):gsub( '%s+$', '' ))
end

local function mergeItem( tbl, key, item )
	if type( tbl[key] ) == 'table' and type( item ) == 'table' then
		for k in pairs( tbl[key] ) do
			mergeItem( tbl[key], k, item[k] )
		end
	elseif type( tbl[key] ) == 'table' then
		table.insert( tbl[key], item )
	else
		tbl[key] = { tbl[key], item }
	end
end

local escapeChars = {
	['{'] = '&#123;',
	['}'] = '&#125;',
	['['] = '&#91;',
	[']'] = '&#93;',
	['|'] = '&#124;',
	['-'] = '&#8208;'
}
local function escape( str )
	return (string.gsub( str, '[{}%[%]|%-]', escapeChars ))
end

local unEscapeChars = {
	['&#123;'] = '{',
	['&#125;'] = '}',
	['&#91;'] = '[',
	['&#93;'] = ']',
	['&#124;'] = '|',
	['&#8208;'] = '-'
}
local function unEscape( str )
	return (string.gsub( str, '&#%d+;', unEscapeChars ))
end

local function fixCurlyBrackets( str )
	-- the \226\157\180\181 are used to match ❴ (U+2774) and ❵ (U+2775) wich are 3 bytes long (UTF-8) so
	-- we can't use them directly inside [] patterns. Ustring would fix this but it's way too slow.
	return (string.gsub( str, '\226\157[\180\181]', { ['❴'] = '{', ['❵'] = '}' } ))
end

local function removeFormattingSettings( query )
	local toRemove = {
		'mode',
		'table',
		'tablerow',
		'tablesortcol',
		'headingmode',
		'headingcount',
		'listattr',
		'itemattr',
		'hlistattr',
		'hitemattr',
		'userdateformat',
		'shownamespace',
		'escapelinks',
		'titlemaxlength',
		'replaceintitle',
		'columns',
		'rows',
		'rowsize',
		'rowcolformat',
		'resultsheader',
		'resultsfooter',
		'oneresultheader',
		'oneresultfooter',
		'noresultsheader',
		'suppresserrors',
		'noresultsfooter',
		'format',
		'groupMultiTemplateResults'
	}

	for _, k in ipairs( toRemove ) do
		query[k] = nil
	end
end

local function formatInclude( query )
	checkTypeForNamedArg( 'Module:DPLlua.ask', 'include', query, 'string' )
	query = split( query, ',', true )
	local includedParamNames = {}
	local sectionAttributes = {}

	for i = 1, #query do
		if query[i]:match( '%b{}' ) then -- Check if we are including a template
			local templateName, extra = query[i]:match( '{(.-)[¦|}](.*)' )
			if hasContent( extra ) then
				local phantomTemplateName = extra:match( '^(.-)}' ) or extra:match( '^[./].+' )
				local phantomTemplatePrefix = extra:match( '^(.-)}' ) and '' or templateName
				local params = extra:gsub( '^.-}', '' ):gsub( '^[./].+', '' ):gsub( ':%-', '' )
				local sur = hasContent( phantomTemplateName ) and ('¦' .. phantomTemplatePrefix .. phantomTemplateName) or ''
				query[i] = string.format( '{%s%s}%s', templateName, sur, params )

				if hasContent( phantomTemplateName ) then
					table.insert( includedParamNames, { name=phantomTemplatePrefix..phantomTemplateName, isTemplate=true, hasPhantomTemplate=true } )
					table.insert( sectionAttributes, { hasPhantomTemplate=true } )
				else
					for param in params:gmatch( ':([^:]*)' ) do
						param = trim( param )
						table.insert( includedParamNames, { name=templateName, isTemplate=true, param=param } )
					end
					table.insert( sectionAttributes, { hasPhantomTemplate=false } )
				end
			else
				query[i] = string.format( '{%s¦DPLlua helper}', templateName ) -- Use a helper template to get all the parameters of our included template
				table.insert( includedParamNames, { name=templateName, isTemplate=true, includeAll=true } )
				table.insert( sectionAttributes, { hasPhantomTemplate=false } )
			end
		else
			table.insert( includedParamNames, { name=trim( query[i] ) } )
			table.insert( sectionAttributes, { hasPhantomTemplate=false } )
		end
	end

	return table.concat( query, ',' ), includedParamNames, sectionAttributes
end

local function formatDpl( query )
	local queries = {}
	local count = query.count or 500
	local offset = query.offset or 0
	local usesInclude = false
	local includedParamNames = {}
	local sectionAttributes
	query.count = nil
	query.offset = nil

	-- Use table format so we can place dataContentMarkers around each included parameter. The secseparator
	-- is needed to add dataContentMarkers when a phantom template is used
	local dplStringInclude =
[=[
{{#dpl:
|[email protected]@
|count=%s
|offset=%s
|%s
|table=,
|listseparators=,\n¦-\n¦[[%%PAGE%%¦]],,
|tablerow=%s
|secseparators=%s
}}]=]

	-- Table format requires an include statement so we use format instead.
	-- This is also a lot faster than adding an empty include statement
	local dplStringNoInclude =
[=[
{{#dpl:
|[email protected]@
|count=%s
|offset=%s
|%s
|format=,¦-¦[[%%PAGE%%¦]],,
}}]=]

	-- Auto generate more than one dpl if count > 500
	-- The results of these are later combined
	for i = 1, math.ceil( count / 500 ) do
		local params = {}

		for k, v in pairs( query ) do
			if k == 'include' then
				v, includedParamNames, sectionAttributes = formatInclude( v )
				usesInclude =  true
			end

			if type( v ) == 'table' then
				for _, x in ipairs( v ) do
					table.insert( params, k .. '=' .. tostring( x ):gsub( '|', '¦' ) )
				end
			else
				table.insert( params, k .. '=' .. tostring( v ):gsub( '|', '¦' ) )
			end
		end

		if usesInclude then
			local secseparators = ''
			for _, v in ipairs( sectionAttributes ) do
				if v.hasPhantomTemplate then
					-- Phantom templates need this because they ignore tablerow formatting
					secseparators = secseparators .. '¶¦' .. dataContentMarker .. ',' .. dataContentMarker .. ','
				else
					secseparators = secseparators .. '¶¦,,'
				end
			end

			table.insert( queries, string.format(
				dplStringInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' ),
				string.rep( dataContentMarker..'%%'..dataContentMarker..',', #includedParamNames ),
				secseparators
			) )
		else
			table.insert( queries, string.format(
				dplStringNoInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' )
			) )
		end

		count = count - 500
		offset = offset + 500
	end

	table.insert( allIncludedParamNames, includedParamNames )
	return table.concat( queries )
end

local function toTable( query, groupMultiTemplateResults )
	local includedParamNames = table.remove( allIncludedParamNames, 1 )
	local usesInclude = #includedParamNames > 0
	local res = {}

	query = query:gsub( '<p>Extension:DynamicPageList .-</p>', function(item) res.error = item; return '' end )

	if query:find( '^@@' ) then -- @@ is used when no result is found
		return res
	end

	if usesInclude then
		query = query:gsub( dataContentMarker..'(.-)'..dataContentMarker, escape )
	end

	query = trim( query )
	query = split( query, '|-', true ) -- Results of the returned pages are separated by |-

	for _, v in ipairs( query ) do
		if hasContent( v ) and not v:find( '^@@' ) then
			v = trim( v )
			local title = v:match( '^|%[%[(.-)|' )
			local rawDataList = v:match( '^|.-|.-|(.*)' ) -- This is everything after the title

			if not usesInclude then
				if title and title ~= '' then
					table.insert( res, title )
				end
			else
				-- When multiple includes are used (e.g. include={Template1},{Template2} or include={Template}:1:2) their results are separated by a pipe
				rawDataList = split( rawDataList, '|', true )
				local cleanedDataList = {}

				for _incIndex, dataItem in ipairs( rawDataList ) do
					local incIndex = ((_incIndex - 1) % #includedParamNames) + 1 -- Needed in case the same template appears multiple times on the same page
					dataItem = unEscape( dataItem )
					dataItem = trim( dataItem )

					if includedParamNames[ incIndex ].isTemplate and includedParamNames[ incIndex ].includeAll then -- Check if we included a full template
						-- When we include an entire template we use the %ARGS% parameter supplied by dpl.
						-- However all | characters are repaced with §, e.g.:
						-- §namelessParam
						-- §param = text [[wowee§link text]]
						-- §param2 = text {{something§something else}}
						dataItem = dataItem:gsub( '\127\'"`UNIQ%-%-nowiki%-%x+%-QINU`"\'\127', function(item) return '<nowiki>' .. item .. '</nowiki>' end )
						dataItem = mw.text.unstripNoWiki( dataItem ) -- Unstrip nowiki so we can clean their content
						dataItem = fixCurlyBrackets( dataItem ) -- When using the %ARGS% dpl parameter, curly brackets are replaced with ❴ (U+2774) and ❵ (U+2775)
						dataItem = dataItem:gsub( '%b{}', function(x) return x:gsub( '§', '|' ) end ) -- Restore pipe characters inside links and templates
						dataItem = dataItem:gsub( '%b[]', function(x) return x:gsub( '§', '|' ) end )
						dataItem = dataItem:gsub( '<nowiki>(.-)</nowiki>', function(x) return mw.getCurrentFrame():extensionTag( 'nowiki', x ) end ) -- Restrip nowiki
						local _dataItem = {}

						if dataItem ~= '' then
							dataItem = split( dataItem:sub( 3 ), '§' ) -- The sub(3) removes the first § at the start. § is 2 bytes wide so start at index 3

							for i, item in ipairs( dataItem ) do
								if item:find( '=' ) then -- Check if the parameter is named or unnamed
									local param, value = item:match( '^%s*(.-)%s*=%s*(.-)%s*$' )
									_dataItem[ param ] = value
								else
									table.insert( _dataItem, trim( item ) )
								end
							end
						end

						dataItem = _dataItem
					end

					local dataListIndex = groupMultiTemplateResults and 1 or math.ceil( _incIndex / #includedParamNames )
					if
						includedParamNames[ incIndex ].isTemplate and
						not includedParamNames[ incIndex ].includeAll and
						not includedParamNames[ incIndex ].hasPhantomTemplate
					then -- This means there was an include in the form 'include = {template}:param'
						local templateName = includedParamNames[ incIndex ].name
						local paramName = includedParamNames[ incIndex ].param
						paramName = tonumber( paramName ) or paramName -- Keep as string if tonumber fails
						cleanedDataList[ dataListIndex ] = cleanedDataList[ dataListIndex ] or {}
						cleanedDataList[ dataListIndex ][ templateName ] = cleanedDataList[ dataListIndex ][ templateName ] or {}
						
						if groupMultiTemplateResults and _incIndex > #includedParamNames then
							mergeItem( cleanedDataList[ dataListIndex ][ templateName ], paramName, dataItem )
						else
							cleanedDataList[ dataListIndex ][ templateName ][ paramName ] = dataItem
						end
					else
						local templateName = includedParamNames[ incIndex ].name
						cleanedDataList[ dataListIndex ] = cleanedDataList[ dataListIndex ] or {}
						
						if groupMultiTemplateResults and _incIndex > #includedParamNames then
							mergeItem( cleanedDataList[ dataListIndex ], templateName, dataItem )
						else
							cleanedDataList[ dataListIndex ][ templateName ] = dataItem
						end
					end
				end

				if title and title ~= '' then
					for _, v in ipairs( cleanedDataList ) do
						table.insert( res, { title=title, include=v } )
					end
				end
			end
		end
	end

	return res
end

-- Accepts a series of tables each containig the settings for a dpl query.
-- Combinig multiple dpl queries yields better performance than doing them sequentially
function dpl.ask( ... )
	local formatTime = os.clock()
	local queries = { ... }
	local wantsGrouping = {}

	for i = 1, #queries do
		checkType( 'Module:DPLlua.ask', i, queries[i], 'table' )
		table.insert( wantsGrouping, queries[i].groupMultiTemplateResults or false )
		removeFormattingSettings( queries[i] )
		queries[i] = formatDpl( queries[i] )
	end
	formatTime = os.clock() - formatTime

	local DPLtime = os.clock()
	queries = table.concat( queries, '[email protected]µ@$' )
	queries = mw.getCurrentFrame():preprocess( queries )
	queries = split( queries, '[email protected]µ@$', true )
	DPLtime = os.clock() - DPLtime

	for i = 1, #queries do
		local parseTime = os.clock()
		queries[i] = toTable( queries[i], wantsGrouping[i] )
		parseTime = os.clock() - parseTime
		queries[i]['DPL time'] = DPLtime
		queries[i]['Parse time'] = math.floor( (formatTime + parseTime) * 1e5 ) / 1e5 -- os.clock() has a resolution of 10µs
	end

	return unpack( queries )
end

-- function dpl.test()
-- 	local time = os.clock()

	-- local a, b = dpl.ask({
	--     namespace = 'Module',
	--     linksto = 'Module:Chart data',
	--     distinct = 'strict',
	--     ordermethod = 'title',
	--     nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Chart data',
	-- 	ignorecase = 'true',
	-- 	allowcachedresults = false
	-- },{
	--     namespace = 'Module',
	--     linksto = 'Module:Enum',
	--     distinct = 'strict',
	--     ordermethod = 'title',
	-- 	nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Enum',
	--     ignorecase = 'true',
	-- 	allowcachedresults = false
	-- })
	-- mw.logObject(a)
	-- mw.logObject(b)

	-- local a, b = dpl.ask({
	--     namespace = 'Module',
	--     linksto = 'Module:Chart data',
	--     distinct = 'strict',
	--     ordermethod = 'title',
	--     nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Chart data',
	--     ignorecase = 'true',
	-- 	allowcachedresults = false
	-- },{
	-- 	namespace = '',
	-- 	ignorecase = 'true',
	-- 	uses = 'Template:Databox Recipe',
	-- 	count = 50,
	-- 	include = '{Databox Recipe},{Infobox Item}',
	-- 	allowcachedresults = false
	-- })
	-- mw.logObject(a)
	-- mw.logObject(b)

	-- local a = dpl.ask{
	-- 	namespace = '',
	-- 	uses = 'Template:Databox Recipe',
	-- 	include = '{Databox Recipe}:skill:name,{Infobox Item}:update,{Infobox Item|test}',
	-- 	count = 50,
	-- 	ordermethod = 'title',
	-- }
	-- mw.logObject(a)

	-- local q = dpl.ask{
	-- 	uses = "Template:Collections table",
	-- 	category = "Archaeology collections",
	-- 	-- include = "{Infobox Collection}:reward,{Collections table}:1:2:3:4:5:6:7:8:9:10:11:12:13:14:15",
	-- 	include = "{Infobox Collection}:reward,{Collections table}",
	-- 	count = 100
	-- }
	-- mw.logObject(q)
	
	-- local q = dpl.ask{
	-- 	namespace = "",
	-- 	uses = "Template:Infobox Spell",
	-- 	notcategory = {"Removed content", "Removed spells"},
	-- 	nottitlematch = {"Enchant Crossbow Bolt", "Storm of Armadyl"},
	-- 	include = "{Infobox Spell}",
	-- }
	-- mw.logObject(q)

-- 	local list = dpl.ask{
-- 		namespace = 'Template',
-- 		uses = 'Template:Navbox',
-- 		ordermethod = 'title',
-- 		include = '{Navbox}:gtitle1:gtitle2',
-- 		count = 1,
-- 		offset = 3
-- 	}
-- 	mw.logObject(list)

-- 	local list = dpl.ask{
-- 		namespace = 'User',
-- 		titlematch = 'CephHunter/Sandbox/test1',
-- 		include = '{User:CephHunter/Sandbox/test2|User:CephHunter/Sandbox/test3},{User:CephHunter/Sandbox/test3}:1',
-- 	}
-- 	mw.logObject(list)

-- 	mw.logObject(dpl.ask{
-- 		namespace = 'User',
-- 		ignorecase = 'true',
-- 		titlematch = 'CephHunter/Sandbox/test1',
-- 		include = '{User:CephHunter/Sandbox/test2}'
-- 	})

-- 	mw.logObject(dpl.ask{
-- 		namespace = 'Module',
-- 		uses = 'Template:Helper module',
-- 		titlematch = '%/doc',
-- 		nottitlematch = 'Exchange/%|Exchange historical/%|Sandbox/%',
-- 		ordermethod = 'title',
-- 		include = '{Helper module}, {Helper module}:example',
-- 		count = 1,
-- 		offset = 13
-- 	})

-- 	mw.logObject(dpl.ask{
--         namespace = 'Module',
--         titlematch = 'Chart data|Absorbative calculator',
--         nottitlematch = 'Exchange/%|Exchange historical/%|Sandbox/%|%/doc|DPLlua%',
--         ordermethod = 'title',
--         include = '%0'
-- 	})

-- 	mw.logObject(dpl.ask{
--         uses = 'Template:Collections table',
--         include = '{Collections table}',
--         count = 5
-- 	})

-- 	mw.log(os.clock()-time)
-- end

return dpl
-- </nowiki>