Module:Sandbox/User:CephHunter/luaDPL

From the RuneScape Wiki, the wiki for all things RuneScape
Jump to: navigation, search
Module documentation
This documentation is transcluded from Template:Module sandbox/doc. [edit] [purge]

This module is a sandbox for CephHunter. It can be used to test changes to existing modules, prototype new modules, or just experimenting with lua features.

Invocations of this sandbox should be kept in userspace; if the module is intended for use in other namespaces, it should be moved out of the sandbox into a normal module and template.

This default documentation can be overridden by creating the /doc subpage of this module, as normal.

-- <nowiki>
local dpl = {}

dpl.pipe = '¦'
local dataContentMarker = '`#@@#`'
local usesInclude = {}

-- Custom function for splitting a string because mw.text.split() is waaay too slow
local function split( str, pattern, plain )
	local res = {}
	continue = true
	local startIndex = 1

	while continue do
		local i, j = string.find( str, pattern, startIndex, plain )
		if i then
			table.insert( res, string.sub( str, startIndex, i-1 ) )
			startIndex = j + 1
		else
			table.insert( res, string.sub( str, startIndex ) )
			continue = false
		end
	end

	return res
end

-- Also custom function for speed
local function trim( str )
	return string.match( str, '^%s*(.-)%s*$' )
end

local escapeChars = {
	['{'] = '&#123;',
	['\180'] = '&#123;', -- Wtf dpl...
	['}'] = '&#125;',
	['\181'] = '&#125;',
	['['] = '&#91;',
	[']'] = '&#93;',
	['|'] = '&#124;',
	['-'] = '&#8208;',
	['\226'] = '',
	['\157'] = '',
}
local function escape( str )
	-- the \226\157\180\181 are used to match ❴ (U+E29DB4) and ❵ (U+E29DB5) wich are 3 bytes long so we can't use them directly inside [] patterns...
	str = string.gsub( str, '[{}%[%]|%-\226\157\180\181]', escapeChars )
	return str
end

local unEscapeChars = {
	['&#123;'] = '{',
	['&#125;'] = '}',
	['&#91;'] = '[',
	['&#93;'] = ']',
	['&#124;'] = '|',
	['&#8208;'] = '-'
}
local function unEscape( str )
	str = string.gsub( str, '&#%d+;', unEscapeChars )
	return str
end

local function removeFormattingSettings( query )
	local toRemove = {
		'mode',
		'table',
		'tablerow',
		'tablesortcol',
		'headingmode',
		'headingcount',
		'listattr',
		'itemattr',
		'hlistattr',
		'hitemattr',
		'userdateformat',
		'shownamespace',
		'escapelinks',
		'titlemaxlength',
		'replaceintitle',
		'columns',
		'rows',
		'rowsize',
		'rowcolformat',
		'resultsheader',
		'resultsfooter',
		'oneresultheader',
		'oneresultfooter',
		'noresultsheader',
		'suppresserrors',
		'noresultsfooter',
		'format'
	}

	for _, k in ipairs( toRemove ) do
		query[k] = nil
	end
end

local function formatInclude( query )
	query = split( query, ',', true )
	local count = #query

	for i = 1, #query do
		if query[i]:match( '%b{}' ) then -- Check if we are including a template
			local templateName, params = query[i]:match( '{(.-)[¦|}]([^,]*)' )
			if params:find( '%S' ) then
				params:gsub( '^:%-', '' )
				query[i] = string.format( '{%s}%s', templateName, params )
				for _ in params:gmatch( ':' ) do
					count = count + 1
				end
			else
				query[i] = string.format( '{%s¦User:CephHunter/luaDPL/helper}', templateName ) -- Use a helper template to get all the parameters of our included template
			end
		end
	end

	return table.concat( query, ',' ), count
end

local function formatDpl( query )
	local queries = {}
	local count = query.count or 500
	local offset = query.offset or 0
	local _usesInclude = false
	query.count = nil
	query.offset = nil

	-- We use table format when the include parameter is used to make sure we can
	-- differentiate between the results in case more than one item is included
	local dplStringInclude =
[=[
{{#dpl:
|noresultsheader = @@
|count=%s
|offset=%s
|%s
|table=,
|tablerow=%s
}}]=]

	-- Table format requires an include statement so we use format instead.
	-- This is also a lot faster than adding an empty include statement
	local dplStringNoInclude =
[=[
{{#dpl:
|noresultsheader = @@
|count=%s
|offset=%s
|%s
|format=,¦-¦[[%%PAGE%%¦]],,
}}]=]

	-- Auto generate more than one dpl if count > 500
	-- The results of these are later combined
	for i = 1, math.ceil( count / 500 ) do
		local params = {}
		local includeCount = 0

		for k, v in pairs( query ) do
			if k == 'include' then
				v, includeCount = formatInclude( v )
				_usesInclude =  true
			end
			table.insert( params, k .. '=' .. tostring( v ) )
		end

		if _usesInclude then
			table.insert( queries, string.format(
				dplStringInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' ),
				string.rep( string.format( '%s%%%%%s,', dataContentMarker, dataContentMarker ), includeCount )
			) )
		else
			table.insert( queries, string.format(
				dplStringNoInclude,
				count > 500 and 500 or count,
				offset,
				table.concat( params, '\n|' )
			) )
		end

		count = count - 500
		offset = offset + 500
	end

	table.insert( usesInclude, _usesInclude )

	return table.concat( queries )
end

local function toTable( query )
	local _usesInclude = table.remove( usesInclude, 1 )
	local res = {}

	if _usesInclude then
		query = query:gsub( dataContentMarker..'(.-)'..dataContentMarker, escape )
		query = query:gsub( '{|.-|%-', '') -- Remove the header of the table
		-- Replace the footer of the table width a row indicator. This effectively
		-- combines the output of multiple dpl queries when count > 500
		query = query:gsub( '|}', '|-' )
	end

	query = trim( query )
	query = split( query, '|-', true ) -- Results of the returned pages are separated by |-

	for _, v in ipairs( query ) do
		if v ~= '@@' and v:match( '%S' ) then -- @@ is used when no result is found
			v = trim( v )
			local title = v:match( '^|%[%[(.-)|' )
			local dataList = v:match( '^|.-|.-|(.*)' ) -- This is everything after the title

			if not _usesInclude then
				if title and title ~= '' then
					table.insert( res, title )
				end
			else
				-- When multiple includes are used (e.g. include={Template1},{Template2}) its results are separated by a pipe
				dataList = split( dataList, '|', true )
				local _dataList = {}

				for _, dataItem in ipairs( dataList ) do
					dataItem = unEscape( dataItem )
					-- When we include an entire template we use the %ARGS% parameter supplied by dpl.
					-- However all | characters are repaced with §, e.g.:
					-- §nameLessParam
					-- §param = text [[wowee§link text]]
					-- §param2 = text {{something§something else}}
					dataItem = dataItem:gsub( '%b{}', function(x) return x:gsub( '§', '|' ) end ) -- Restore pipe characters inside links and templates
					dataItem = dataItem:gsub( '%b[]', function(x) return x:gsub( '§', '|' ) end )
					dataItem = trim( dataItem )

					if dataItem:match( '§' ) then -- Check if we included a template
						dataItem = split( dataItem, '§', true )
						local _dataItem = {}

						for i, item in ipairs( dataItem ) do
							if i ~= 1 then -- skip first item as it is a false empty string created by splitting on § when the string started with a §
								if item:find( '=' ) then -- Check if the parameter is named or unnamed
									local param, value = item:match( '^%s*(.-)%s*=%s*(.-)%s*$' )
									_dataItem[ param ] = value
								else
									table.insert( _dataItem, trim( item ) )
								end
							end
						end

						dataItem = _dataItem
					end

					table.insert( _dataList, dataItem )
				end

				if title and title ~= '' then
					table.insert( res, { title=title, include=_dataList } )
				end
			end
		end
	end

	return res
end

-- Accepts a series of tables each containig the settings for a dpl query.
-- Combinig multiple dpl queries yields better performance than doing them sequentially
function dpl.ask( ... )
	local queries = { ... }

	for i = 1, #queries do
		removeFormattingSettings( queries[i] )
		queries[i] = formatDpl( queries[i] )
	end

	queries = table.concat( queries, '[email protected]µ@$' )
	local time = os.clock()
	queries = mw.getCurrentFrame():preprocess( queries )
	time = os.clock() - time
	queries = split( queries, '[email protected]µ@$', true )

	for i = 1, #queries do
		queries[i] = toTable( queries[i] )
		queries[i].time = time
	end

	return unpack( queries )
end

-- function dpl.test()
-- 	local time = os.clock()

-- 	-- local a, b = dpl.ask({
-- 	--     namespace = 'Module',
-- 	--     linksto = 'Module:Chart data',
-- 	--     distinct = 'strict',
-- 	--     ordermethod = 'title',
-- 	--     nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Chart data',
-- 	-- 	ignorecase = 'true',
-- 	-- 	allowcachedresults = false
-- 	-- },{
-- 	--     namespace = 'Module',
-- 	--     linksto = 'Module:Enum',
-- 	--     distinct = 'strict',
-- 	--     ordermethod = 'title',
-- 	-- 	nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Enum',
-- 	--     ignorecase = 'true',
-- 	-- 	allowcachedresults = false
-- 	-- })
-- 	-- -- mw.logObject(a)
-- 	-- mw.logObject(b)

-- 	-- local a, b = dpl.ask({
-- 	--     namespace = 'Module',
-- 	--     linksto = 'Module:Chart data',
-- 	--     distinct = 'strict',
-- 	--     ordermethod = 'title',
-- 	--     nottitlematch = '%/doc¦%sandbox%¦Exchange/%¦Exchange historical/%¦Chart data',
-- 	--     ignorecase = 'true',
-- 	-- 	allowcachedresults = false
-- 	-- },{
-- 	-- 	namespace = '',
-- 	-- 	ignorecase = 'true',
-- 	-- 	uses = 'Template:Infobox Recipe',
-- 	-- 	count = 1,
-- 	-- 	include = '{Infobox Recipe},{Infobox Item}',
-- 	-- 	allowcachedresults = false
-- 	-- })
-- 	-- mw.logObject(a)
-- 	-- mw.logObject(b)

-- 	-- local a = dpl.ask{
-- 	-- 	namespace = '',
-- 	-- 	ignorecase = 'true',
-- 	-- 	uses = 'Template:Infobox Recipe',
-- 	-- 	count = 20,
-- 	-- 	include = '{Infobox Recipe}:,{Infobox Item}:',
-- 	-- 	allowcachedresults = false
-- 	-- }
-- 	-- mw.logObject(a)

-- 	-- local Debug = require 'Module:Debug'
-- 	-- local profiler = Debug:newProfiler()
-- 	-- profiler:setHooks{{table=_G,R=true},{table=dpl,tname='dpl'}}
-- 	-- local time = os.clock()
-- 	-- local list = dpl.ask{
-- 	-- 	namespace = 'Template',
-- 	-- 	uses = 'Template:Navbox',
-- 	-- 	ordermethod = 'title',
-- 	-- 	include = '{Navbox}',
-- 	-- 	count = 10,
-- 	-- 	allowcachedresults = false
-- 	-- }
-- 	-- mw.logObject(list)
-- 	-- mw.log(os.clock()-time)
-- 	-- mw.log(list.time)
-- 	-- mw.log(profiler:report('time'))

-- 	-- mw.logObject(dpl.ask{
-- 	-- 	namespace = 'User',
-- 	-- 	ignorecase = 'true',
-- 	-- 	titlematch = 'CephHunter/Sandbox/test1',
-- 	-- 	include = '{User:CephHunter/Sandbox/test2}'
-- 	-- })

-- 	mw.log(os.clock()-time)
-- end

return dpl
-- </nowiki>