Module:Category handler: Difference between revisions
From All Skies Encyclopaedia
| imported>Mr. Stradivarius  (add blacklist check) | imported>Mr. Stradivarius   (add the rest of the features, improve the comments) | ||
| Line 1: | Line 1: | ||
| ---------------------------------------------------------------------- | |||
| -- Configuration data. | |||
| --                                                                  -- | |||
| --                         CATEGORY HANDLER                         -- | |||
| --                                                                  -- | |||
| --      This module implements the {{category handler}} template    -- | |||
| --      in Lua, with a few improvements: all namespaces and all     -- | |||
| --      namespace aliases are supported, and namespace names are    -- | |||
| --      detected automatically for the local wiki. This module      -- | |||
| --      requires [[Module:Namespace detect]] to be available on     -- | |||
| --      the local wiki. It can be configured for different wikis    -- | |||
| --      by altering the values in the "cfg" table.                  -- | |||
| --                                                                  -- | |||
| ---------------------------------------------------------------------- | |||
| ---------------------------------------------------------------------- | |||
| --                      Configuration data                          -- | |||
| --      Language-specific parameter names and values can be set     -- | |||
| --      here.                                                       -- | |||
| ---------------------------------------------------------------------- | |||
| local cfg = {} | local cfg = {} | ||
| -- cfg.nocat is the parameter name to suppress categorisation. | |||
| -- cfg.nocatTrue is the value to suppress categorisation, and  | |||
| -- cfg.nocatFalse is the value to both categorise and to skip the | |||
| -- blacklist check. | |||
| cfg.nocat = 'nocat'     | cfg.nocat = 'nocat'     | ||
| cfg.nocatTrue = 'true' | |||
| cfg.nocatFalse = 'false' | |||
| -- The parameter name for the legacy "categories" parameter.  | |||
| cfg.categories = 'categories' | cfg.categories = 'categories' | ||
| cfg. | cfg.categoriesYes = 'yes' | ||
| cfg.page = 'page' | |||
| -- The parameter name for the legacy "category2" parameter. This | |||
| -- skips the blacklist if set to the cfg.category2Yes value, and | |||
| -- suppresses categorisation if present but equal to anything other | |||
| -- than cfg.category2Yes. | |||
| cfg.category2 = 'category2' | cfg.category2 = 'category2' | ||
| cfg.category2Yes = 'yes' | |||
| -- cfg.subpage is the parameter name to specify how to behave on | |||
| -- subpages. cfg.subpageNo is the value to specify to not  | |||
| -- categorise on subpages; cfg.only is the value to specify to only | |||
| -- categorise on subpages. | |||
| cfg.subpage = 'subpage' | |||
| cfg.subpageNo = 'no' | |||
| cfg.subpageOnly = 'only' | |||
| -- The parameter for data to return in all namespaces. | |||
| cfg.all = 'all' | cfg.all = 'all' | ||
| cfg.main = 'main' | |||
| -- The parameter name for data to return if no data is specified for | |||
| -- the namespace that is detected. This must be the same as the  | |||
| -- cfg.other parameter in [[Module:Namespace detect]]. | |||
| cfg.other = 'other' | cfg.other = 'other' | ||
| -- The parameter name used to specify a page other than the current | |||
| -- page; used for testing and demonstration. This must be the same | |||
| -- as the cfg.page parameter in [[Module:Namespace detect]]. | |||
| cfg.page = 'page' | |||
| -- The categorisation blacklist. Pages that match Lua patterns in this | -- The categorisation blacklist. Pages that match Lua patterns in this | ||
| Line 34: | Line 84: | ||
| } | } | ||
| -- This is a table of namespaces to categorise by default. | |||
| -- Module start. | |||
| cfg.defaultNamespaces = { | |||
|     0, -- Main | |||
|     6, -- File | |||
|     12, -- Help | |||
|     14 -- Category | |||
| } | |||
| ---------------------------------------------------------------------- | |||
| --                     End configuration data                       -- | |||
| ---------------------------------------------------------------------- | |||
| -- Get dependent modules and declare the table of functions that we will | |||
| -- return. | |||
| local NamespaceDetect = require('Module:Namespace detect') | |||
| local p = {} | local p = {} | ||
| local args = {} | |||
| ---------------------------------------------------------------------- | |||
| -- Get the page object. This will return the page object for the page | |||
| --                         Local functions                          -- | |||
| -- specified, or nil if there are errors in the title or if the | |||
| --      The following are internal functions, which we do not want  -- | |||
| -- expensive function count has been exceeded. | |||
| --      to be accessible from other modules.                        -- | |||
| local function getPageObject() | |||
| ---------------------------------------------------------------------- | |||
|     -- Get the title object for args.page if it is specified. Otherwise | |||
|     -- get the title object for the current page. | |||
|     if args[cfg.page] then | |||
|         -- Get the page object, passing the function through pcall  | |||
|         -- in case we are over the expensive function count limit. | |||
|         local noError, pageObject = pcall(mw.title.new, args[cfg.page]) | |||
|         if not noError then | |||
|             return nil | |||
|         else | |||
|             return pageObject | |||
|         end | |||
|     else | |||
|         return mw.title.getCurrentTitle() | |||
|     end     | |||
| end | |||
| -- Find whether we need to return a category or not. | -- Find whether we need to return a category or not. | ||
| local function needsCategory( pageObject ) | local function needsCategory( pageObject, args ) | ||
|     -- If there is no pageObject available, then that either means that we are over | |||
|     -- the expensive function limit or that the title specified was invalid. Invalid | |||
|     if args[cfg.nocat] == 'true' | |||
|     -- titles will probably only be a problem during testing, so choose the best | |||
|         or ( args[cfg.category2] and args[cfg.category2] ~= 'yes' ) | |||
|     -- default for being over the expensive function limit, i.e. categorise the page. | |||
|         or ( args[cfg.subpage] == 'no' and pageObject.isSubpage ) | |||
|     if not pageObject then  | |||
|         return true | |||
|     end | |||
|     -- Only categorise if the relevant options are set. | |||
|     if args[cfg.nocat] == cfg.nocatTrue | |||
|         or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes ) | |||
|         or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage ) | |||
|         or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then | |||
|         return false |         return false | ||
|     else |     else | ||
| Line 72: | Line 128: | ||
| -- Find whether we need to check the blacklist or not. | -- Find whether we need to check the blacklist or not. | ||
| local function needsBlacklistCheck() | local function needsBlacklistCheck( args ) | ||
|     if args[cfg.nocat] ==  |     if args[cfg.nocat] == cfg.nocatFalse | ||
|         or args[cfg.categories] ==  |         or args[cfg.categories] == cfg.categoriesYes | ||
|         or args[cfg.category2] ==  |         or args[cfg.category2] == cfg.category2Yes then | ||
|         return false |         return false | ||
|     else |     else | ||
| Line 85: | Line 141: | ||
| -- string searched is the namespace plus the title, including subpages. | -- string searched is the namespace plus the title, including subpages. | ||
| -- Returns true if there is a match, otherwise returns false. | -- Returns true if there is a match, otherwise returns false. | ||
| local function findBlacklistMatch(pageObject) | local function findBlacklistMatch( pageObject ) | ||
|     if not pageObject then return end |     if not pageObject then return end | ||
| Line 106: | Line 162: | ||
| end | end | ||
| -- Find whether any namespace parameters have been specified. | |||
| local function _main() | |||
| -- Mappings is the table of parameter mappings taken from | |||
|     local pageObject = getPageObject() | |||
| -- [[Module:Namespace detect]]. | |||
|     if not needsCategory( pageObject ) then return end | |||
| local function nsParamsExist( mappings, args ) | |||
|     if needsBlacklistCheck() then | |||
|     if args[cfg.all] or args[cfg.other] then | |||
|         return findBlacklistMatch( pageObject ) | |||
|         return true | |||
|     end |     end | ||
|     for ns, params in pairs( mappings ) do | |||
|         for i, param in ipairs( params ) do | |||
|             if args[param] then | |||
|                 return true | |||
|             end | |||
|         end | |||
|     end | |||
|     return false | |||
| end | end | ||
| -- The main structure of the module. Checks whether we need to categorise, | |||
| -- Process the arguments. | |||
| -- and then passes the relevant arguments to [[Module:Namespace detect]]. | |||
| function p.main(frame) | |||
| local function _main( args ) | |||
|     -- Get the page object and argument mappings from | |||
|     -- [[Module:Namespace detect]], to save us from having to rewrite the | |||
|     -- code. | |||
|     local pageObject = NamespaceDetect.getPageObject() | |||
|     local mappings = NamespaceDetect.getParamMappings() | |||
|     -- Check if we need a category or not, and return nothing if not. | |||
|     if not needsCategory( pageObject, args ) then return end | |||
|     local ret = '' -- The string to return. | |||
|     if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then | |||
|         if not nsParamsExist( mappings, args ) then | |||
|             -- No namespace parameters exist; basic usage. | |||
|             local ndargs = {} | |||
|             for _, nsid in ipairs( cfg.defaultNamespaces ) do | |||
|                 ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1] | |||
|             end | |||
|             ndargs.page = args.page | |||
|             local ndresult = NamespaceDetect.main( ndargs ) | |||
|             if ndresult then | |||
|                 ret = ret .. ndresult | |||
|             end | |||
|         else | |||
|             -- Namespace parameters exist; advanced usage. | |||
|             -- If the all parameter is specified, return it. | |||
|             if args.all then | |||
|                 ret = ret .. args.all | |||
|             end | |||
|             -- Get the arguments to pass to [[Module:Namespace detect]]. | |||
|             local ndargs = {} | |||
|             for ns, params in pairs( mappings ) do | |||
|                 for _, param in ipairs( params ) do | |||
|                     ndargs[param] = args[param] or args[cfg.other] or nil | |||
|                 end | |||
|             end | |||
|             if args.other then | |||
|                 ndargs.other = args.other | |||
|             end | |||
|             if args.page then | |||
|                 ndargs.page = args.page | |||
|             end | |||
|             local data = NamespaceDetect.main( ndargs ) | |||
|             -- Work out what to return based on the result of the namespace | |||
|             -- detect call. | |||
|             local datanum = tonumber( data ) | |||
|             if type( datanum ) == 'number' then | |||
|                 -- "data" is a number, so return that positional parameter. | |||
|                 -- Remove non-positive integer values, as only positive integers | |||
|                 -- from 1-10 were used with the old template. | |||
|                 if datanum > 0  | |||
|                     and math.floor( datanum ) == datanum | |||
|                     and args[datanum] then | |||
|                     ret = ret .. args[ datanum ] | |||
|                 end | |||
|             else | |||
|                 -- "data" is not a number, so return it as it is. | |||
|                 if type(data) == 'string' then | |||
|                     ret = ret .. data | |||
|                 end | |||
|             end | |||
|         end | |||
|     end | |||
|     return ret | |||
| end | |||
| ---------------------------------------------------------------------- | |||
| --                        Global functions                          -- | |||
| --      The following functions are global, because we want them    -- | |||
| --      to be accessible from #invoke and from other Lua modules.   -- | |||
| --      At the moment only the main function is here. It processes  -- | |||
| --      the arguments and passes them to the _main function.         -- | |||
| ---------------------------------------------------------------------- | |||
| function p.main( frame ) | |||
|     -- If called via #invoke, use the args passed into the invoking |     -- If called via #invoke, use the args passed into the invoking | ||
|     -- template, or the args passed to #invoke if any exist. Otherwise |     -- template, or the args passed to #invoke if any exist. Otherwise | ||
| Line 129: | Line 271: | ||
|         origArgs = frame |         origArgs = frame | ||
|     end |     end | ||
|     -- The following don't need blank values preserved: | |||
|     -- nocat | |||
|     -- categories | |||
|     -- subpage | |||
|     -- page | |||
|     -- positional parameters (1-10) | |||
|     -- The following *do* need blank values preserved | |||
|     -- category2 | |||
|     -- all | |||
|     -- other | |||
|     -- main | |||
|     -- all the namespace parameters | |||
|     -- Trim whitespace and remove blank arguments for the following args: |     -- Trim whitespace and remove blank arguments for the following args: | ||
|     -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page". |     -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page". | ||
|     local args = {} | |||
|     for k, v in pairs(origArgs) do | |||
|     for k, v in pairs( origArgs ) do | |||
|         v = mw.text.trim(v) -- Trim whitespace. |         v = mw.text.trim(v) -- Trim whitespace. | ||
|         if type(k) == 'number' |         if type(k) == 'number' | ||
| Line 161: | Line 290: | ||
|     end |     end | ||
|     -- Lower-case "nocat", "categories", "category2", and "subpage". |     -- Lower-case "nocat", "categories", "category2", and "subpage". These | ||
|     -- parameters are put in lower case whenever they appear in the old | |||
|     -- template, so we can just do it once here and save ourselves some work. | |||
|     local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage } |     local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage } | ||
|     for _, v in ipairs( lowercase ) do |     for _, v in ipairs( lowercase ) do | ||
| Line 169: | Line 300: | ||
|     end |     end | ||
|     return _main() |     return _main( args ) | ||
| end | end | ||
Revision as of 09:36, 1 July 2013
Documentation for this module may be created at Module:Category handler/doc
----------------------------------------------------------------------
--                                                                  --
--                         CATEGORY HANDLER                         --
--                                                                  --
--      This module implements the {{category handler}} template    --
--      in Lua, with a few improvements: all namespaces and all     --
--      namespace aliases are supported, and namespace names are    --
--      detected automatically for the local wiki. This module      --
--      requires [[Module:Namespace detect]] to be available on     --
--      the local wiki. It can be configured for different wikis    --
--      by altering the values in the "cfg" table.                  --
--                                                                  --
----------------------------------------------------------------------
----------------------------------------------------------------------
--                      Configuration data                          --
--      Language-specific parameter names and values can be set     --
--      here.                                                       --
----------------------------------------------------------------------
local cfg = {}
-- cfg.nocat is the parameter name to suppress categorisation.
-- cfg.nocatTrue is the value to suppress categorisation, and 
-- cfg.nocatFalse is the value to both categorise and to skip the
-- blacklist check.
cfg.nocat = 'nocat'    
cfg.nocatTrue = 'true'
cfg.nocatFalse = 'false'
-- The parameter name for the legacy "categories" parameter. 
cfg.categories = 'categories'
cfg.categoriesYes = 'yes'
-- The parameter name for the legacy "category2" parameter. This
-- skips the blacklist if set to the cfg.category2Yes value, and
-- suppresses categorisation if present but equal to anything other
-- than cfg.category2Yes.
cfg.category2 = 'category2'
cfg.category2Yes = 'yes'
-- cfg.subpage is the parameter name to specify how to behave on
-- subpages. cfg.subpageNo is the value to specify to not 
-- categorise on subpages; cfg.only is the value to specify to only
-- categorise on subpages.
cfg.subpage = 'subpage'
cfg.subpageNo = 'no'
cfg.subpageOnly = 'only'
-- The parameter for data to return in all namespaces.
cfg.all = 'all'
-- The parameter name for data to return if no data is specified for
-- the namespace that is detected. This must be the same as the 
-- cfg.other parameter in [[Module:Namespace detect]].
cfg.other = 'other'
-- The parameter name used to specify a page other than the current
-- page; used for testing and demonstration. This must be the same
-- as the cfg.page parameter in [[Module:Namespace detect]].
cfg.page = 'page'
-- The categorisation blacklist. Pages that match Lua patterns in this
-- list will not be categorised unless the appropriate options are set.
-- If the namespace name has a space in, it must be written with an
-- underscore, e.g. "Wikipedia_talk". Other parts of the title can have
-- either underscores or spaces.
cfg.blacklist = {
    '^Main Page$', -- don't categorise the main page.
    
    -- Don't categorise the following pages or their subpages.
    '^Wikipedia:Cascade%-protected items$',
    '^Wikipedia:Cascade%-protected items/.*$',
    '^User:UBX$', -- The userbox "template" space.
    '^User:UBX/.*$',
    '^User_talk:UBX$',
    '^User_talk:UBX/.*$',
    
    -- Don't categorise subpages of these pages, but allow
    -- categorisation of the base page.
    '^Wikipedia:Template messages/.*$',
    
    '/[aA]rchive' -- Don't categorise archives.
}
-- This is a table of namespaces to categorise by default.
cfg.defaultNamespaces = {
    0, -- Main
    6, -- File
    12, -- Help
    14 -- Category
}
----------------------------------------------------------------------
--                     End configuration data                       --
----------------------------------------------------------------------
-- Get dependent modules and declare the table of functions that we will
-- return.
local NamespaceDetect = require('Module:Namespace detect')
local p = {}
----------------------------------------------------------------------
--                         Local functions                          --
--      The following are internal functions, which we do not want  --
--      to be accessible from other modules.                        --
----------------------------------------------------------------------
-- Find whether we need to return a category or not.
local function needsCategory( pageObject, args )
    -- If there is no pageObject available, then that either means that we are over
    -- the expensive function limit or that the title specified was invalid. Invalid
    -- titles will probably only be a problem during testing, so choose the best
    -- default for being over the expensive function limit, i.e. categorise the page.
    if not pageObject then 
        return true
    end
    -- Only categorise if the relevant options are set.
    if args[cfg.nocat] == cfg.nocatTrue
        or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
        or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
        or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
        return false
    else
        return true
    end
end
-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck( args )
    if args[cfg.nocat] == cfg.nocatFalse
        or args[cfg.categories] == cfg.categoriesYes
        or args[cfg.category2] == cfg.category2Yes then
        return false
    else
        return true
    end
end
-- Searches the blacklist to find a match with the page object. The 
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch( pageObject )
    if not pageObject then return end
    
    -- Get the title to check.
    local title = pageObject.nsText -- Get the namespace.
    -- Append a colon if the namespace isn't the blank string.
    if #title > 0 then
        title = title .. ':' .. pageObject.text
    else
        title = pageObject.text
    end
    
    -- Check the blacklist.
    for i, pattern in ipairs( cfg.blacklist ) do
        if mw.ustring.match( title, pattern ) then
            return true
        end
    end
    return false
end
-- Find whether any namespace parameters have been specified.
-- Mappings is the table of parameter mappings taken from
-- [[Module:Namespace detect]].
local function nsParamsExist( mappings, args )
    if args[cfg.all] or args[cfg.other] then
        return true
    end
    for ns, params in pairs( mappings ) do
        for i, param in ipairs( params ) do
            if args[param] then
                return true
            end
        end
    end
    return false
end
-- The main structure of the module. Checks whether we need to categorise,
-- and then passes the relevant arguments to [[Module:Namespace detect]].
local function _main( args )
    -- Get the page object and argument mappings from
    -- [[Module:Namespace detect]], to save us from having to rewrite the
    -- code.
    local pageObject = NamespaceDetect.getPageObject()
    local mappings = NamespaceDetect.getParamMappings()
    
    -- Check if we need a category or not, and return nothing if not.
    if not needsCategory( pageObject, args ) then return end
    
    local ret = '' -- The string to return.
    if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
        if not nsParamsExist( mappings, args ) then
            -- No namespace parameters exist; basic usage.
            local ndargs = {}
            for _, nsid in ipairs( cfg.defaultNamespaces ) do
                ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
            end
            ndargs.page = args.page
            local ndresult = NamespaceDetect.main( ndargs )
            if ndresult then
                ret = ret .. ndresult
            end
        else
            -- Namespace parameters exist; advanced usage.
            -- If the all parameter is specified, return it.
            if args.all then
                ret = ret .. args.all
            end
            
            -- Get the arguments to pass to [[Module:Namespace detect]].
            local ndargs = {}
            for ns, params in pairs( mappings ) do
                for _, param in ipairs( params ) do
                    ndargs[param] = args[param] or args[cfg.other] or nil
                end
            end
            if args.other then
                ndargs.other = args.other
            end
            if args.page then
                ndargs.page = args.page
            end
            local data = NamespaceDetect.main( ndargs )
            
            -- Work out what to return based on the result of the namespace
            -- detect call.
            local datanum = tonumber( data )
            if type( datanum ) == 'number' then
                -- "data" is a number, so return that positional parameter.
                -- Remove non-positive integer values, as only positive integers
                -- from 1-10 were used with the old template.
                if datanum > 0 
                    and math.floor( datanum ) == datanum
                    and args[datanum] then
                    ret = ret .. args[ datanum ]
                end
            else
                -- "data" is not a number, so return it as it is.
                if type(data) == 'string' then
                    ret = ret .. data
                end
            end
        end
    end
    return ret
end
----------------------------------------------------------------------
--                        Global functions                          --
--      The following functions are global, because we want them    --
--      to be accessible from #invoke and from other Lua modules.   --
--      At the moment only the main function is here. It processes  --
--      the arguments and passes them to the _main function.         --
----------------------------------------------------------------------
function p.main( frame )
    -- If called via #invoke, use the args passed into the invoking
    -- template, or the args passed to #invoke if any exist. Otherwise
    -- assume args are being passed directly in.
    local origArgs
    if frame == mw.getCurrentFrame() then
        origArgs = frame:getParent().args
        for k, v in pairs( frame.args ) do
            origArgs = frame.args
            break
        end
    else
        origArgs = frame
    end
    -- Trim whitespace and remove blank arguments for the following args:
    -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
    local args = {}
    for k, v in pairs( origArgs ) do
        v = mw.text.trim(v) -- Trim whitespace.
        if type(k) == 'number'
            or k == cfg.nocat
            or k == cfg.categories
            or k == cfg.subpage
            or k == cfg.page then
            if v ~= '' then
                args[k] = v
            end
        else
            args[k] = v
        end
    end
    
    -- Lower-case "nocat", "categories", "category2", and "subpage". These
    -- parameters are put in lower case whenever they appear in the old
    -- template, so we can just do it once here and save ourselves some work.
    local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
    for _, v in ipairs( lowercase ) do
        if args[v] then
            args[v] = mw.ustring.lower( args[v] )
        end
    end
    
    return _main( args )
end
return p







