Module:Footnotes/anchor id list: Difference between revisions
From All Skies Encyclopaedia
imported>Trappist the monk No edit summary |
imported>Hike395 (fix errors when using Visual Editor) |
||
(23 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
require(' |
require('strict'); |
||
local data = mw.loadData ('Module:Footnotes/anchor id list/data'); |
|||
local whitelist = mw.loadData ('Module:Footnotes/whitelist'); |
|||
local Lang_obj = mw.language.getContentLanguage(); -- used by template_list_add() to uppercase first letter of template name TODO: better way to do that? |
|||
local anchor_id_list = {}; |
|||
local redirect_patterns_anchor = { |
|||
'{{%s*[Aa]nchor', |
|||
'{{%s*[Aa]nchor for redirect', |
|||
'{{%s*[Aa]nchors', |
|||
'{{%s*[Aa]nchro', |
|||
'{{%s*[Aa]ncor', |
|||
} |
|||
local redirect_patterns_harvc = { |
|||
'{{%s*[Hh]arvc', |
|||
'{{%s*[Cc]itec', |
|||
} |
|||
local redirect_patterns_sfn_whitelist = { |
|||
'{{%s*[Ss]fn whitelist', |
|||
'{{%s*[Hh]arv whitelist', |
|||
} |
|||
local redirect_patterns_vcite = { |
|||
'{{%s*[Vv]cite', |
|||
'{{%s*[Vv]ancite', |
|||
-- '{{%s*[Cc]it ', -- disabled 'cit journal & cit paper' redirect to vcite journal but 'cit book', 'cit new', 'cit web' are cs1 redirects |
|||
} |
|||
local redirects_citation = { |
|||
['citation'] = true, |
|||
['cite'] = true, |
|||
['cite citation'] = true, |
|||
['cite study'] = true, |
|||
['cite technical standard'] = true, |
|||
} |
|||
local redirects_date = { |
local redirects_date = { |
||
['date'] = true, |
['date'] = true, |
||
Line 37: | Line 11: | ||
['isotomos'] = true, |
['isotomos'] = true, |
||
} |
} |
||
local redirects_patent = { -- special case cs1-like templates because uses different parameters for name and date in anchor ID |
|||
local redirects_harvc = { |
|||
[' |
['Cite patent'] = true, |
||
[' |
['Citeref patent'] = true, |
||
['Ref patent'] = true, |
|||
} |
|||
local redirects_patent = { |
|||
['cite patent'] = true, |
|||
['citeref patent'] = true, |
|||
['ref patent'] = true, |
|||
} |
} |
||
local redirects_sfnref = { |
local redirects_sfnref = { |
||
Line 90: | Line 60: | ||
'|%s*date%s*=%s*', |
'|%s*date%s*=%s*', |
||
'|%s*publication%-?date%s*=%s*', |
'|%s*publication%-?date%s*=%s*', |
||
'|%s*air%-?date%s*=%s*', |
|||
} |
} |
||
local alias_patterns_harvc_date = { -- normal lua patterns for harvc template |
local alias_patterns_harvc_date = { -- normal lua patterns for harvc template |
||
Line 102: | Line 73: | ||
} |
} |
||
local patterns_date = { -- normal lua patterns |
local patterns_date = { -- normal lua patterns |
||
-- '(%d%d%d%d–%d%d%d%d%l?)$', -- YYYY–YYYY four-digit year range at end (Season YYYY–YYYY); with or without dab |
|||
'(%d%d%d%d)%D+(%d%d%d%d%l?)$', -- any range with four-digit years; with or without dab; not two captures |
|||
'^(%d%d%d%d–%d%d%l?)$', -- YYYY–YY two-digit year range; with or without dab |
'^(%d%d%d%d–%d%d%l?)$', -- YYYY–YY two-digit year range; with or without dab |
||
'^(c%. %d%d%d%d?%l?)$', -- three- or four-digit circa year; with or without dab |
'^(c%. %d%d%d%d?%l?)$', -- three- or four-digit circa year; with or without dab |
||
Line 117: | Line 89: | ||
'<source.->.-</source>', -- deprecated alias of syntaxhighlight tag |
'<source.->.-</source>', -- deprecated alias of syntaxhighlight tag |
||
} |
} |
||
local template_skip = { -- templates to be skipped for whatever reason; mostly because they resemble cs1-like templates |
|||
local template_skip = { |
|||
[' |
['Citation-attribution'] = true, |
||
} |
} |
||
local global_article_content = nil |
|||
local Article_content; |
|||
local global_anchor_id_list = nil -- exported tables |
|||
local global_template_list = nil |
|||
local global_article_whitelist = nil |
|||
--[[--------------------------< A R T I C L E _ C O N T E N T _ G E T >---------------------------------------- |
--[[--------------------------< A R T I C L E _ C O N T E N T _ G E T >---------------------------------------- |
||
Line 130: | Line 108: | ||
local function article_content_get () |
local function article_content_get () |
||
if global_article_content then return global_article_content end |
|||
if not Article_content then |
|||
local article_content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625 |
|||
for _, tag in ipairs (patterns_tags) do |
|||
article_content = article_content:gsub (tag, ''); -- remove certain html-like tags and their content |
|||
end |
|||
end |
end |
||
global_article_content = article_content |
|||
return article_content |
|||
end |
end |
||
Line 161: | Line 140: | ||
while parts[i] and 7 > j do -- loop through what should be just positional parameters for names and year (2-6 four names and a date) |
while parts[i] and 7 > j do -- loop through what should be just positional parameters for names and year (2-6 four names and a date) |
||
if not parts[i]:find ('=') then -- look for equal sign (named paraneter in a template that doesn't support named parameters) |
if not parts[i]:find ('=') then -- look for equal sign (named paraneter in a template that doesn't support named parameters) |
||
anchor_id[j] = parts[i]; |
anchor_id[j] = parts[i]; -- positional parameters are saved |
||
j = j+1; -- bump the anchor_id{} indexer |
j = j+1; -- bump the anchor_id{} indexer |
||
end |
end |
||
Line 181: | Line 160: | ||
local function date_get (template, aliases) |
local function date_get (template, aliases) |
||
local date; |
|||
local rvalue; |
local rvalue; |
||
Line 210: | Line 188: | ||
if rvalue then |
if rvalue then |
||
for _, pattern in ipairs (patterns_date) do -- spin through the recognized date formats |
for _, pattern in ipairs (patterns_date) do -- spin through the recognized date formats |
||
date = rvalue:match (pattern); -- attempt to extract year portion according to the pattern |
-- date = rvalue:match (pattern); -- attempt to extract year portion according to the pattern |
||
local date, date2 = rvalue:match (pattern); -- attempt to extract year portion according to the pattern; <date2> gets second year in any range |
|||
if date then |
if date then |
||
if date2 then -- when a second year |
|||
date = table.concat ({date, '–', date2}); -- build a date range |
|||
end |
|||
return date; -- matched so return; |
return date; -- matched so return; |
||
end |
end |
||
Line 349: | Line 331: | ||
local function template_strip (template) |
local function template_strip (template) |
||
template = template:gsub ('^{{', ''):gsub ('}}$', '', 1); |
template = template:gsub ('^{{%s*', ''):gsub ('%s*}}$', '', 1); -- remove outer {{ and }} (cs1|2 template delimiters with trailing/leading whitespace) |
||
template = template:gsub ('%b{}', ''); -- remove any templates from the cs1|2 template |
template = template:gsub ('%b{}', ''); -- remove any templates from the cs1|2 template |
||
return template; |
return template; |
||
Line 403: | Line 385: | ||
local function template_name_get (template) |
local function template_name_get (template) |
||
local template_name = template:match ('{{%s*([^/|]+)'); |
local template_name = template:match ('^{{%s*([^/|}]+)'); -- get template name; ignore subpages ~/new, ~/sandbox; parser functions |
||
if not template_name then |
|||
if not template_name or template_name:match ('^#') then -- parser functions, magic words don't count as templates |
|||
return nil; -- could not get template name from (possibly corrupt) template; extraneous opening { mid template can cause this; |
return nil; -- could not get template name from (possibly corrupt) template; extraneous opening { mid template can cause this; |
||
end; |
end; |
||
template_name = template_name:gsub ('%s*$', ''); -- trim whitespace |
template_name = template_name:gsub ('%s*$', ''); -- trim trailing whitespace; leading whitespace already removed |
||
return Lang_obj:ucfirst (template_name); -- first character in template name must be uppercase (same as canonical template name) TODO: better way to do this? |
|||
template_name = template_name:lower(); -- and lowercase only |
|||
return template_name; |
|||
end |
end |
||
Line 420: | Line 402: | ||
]] |
]] |
||
local function template_params_get (template, |
local function template_params_get (template, params_t) |
||
template = wikilink_strip (template); -- because piped wikilinks confuse code that builds |
template = wikilink_strip (template); -- because piped wikilinks confuse code that builds params_t{} and because wikilinks not allowed in an anchor id |
||
-- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed |
-- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed |
||
template = template_strip (template); -- because template markup can confuse code that builds |
template = template_strip (template); -- because template markup can confuse code that builds params_t{} and because templates in name parameters are not allowed |
||
local temp_t = mw.text.split (template, '%s*|%s*'); --split on the pipe |
|||
for _, param in ipairs (temp_t) do |
|||
if param:find ('=', 1, true) then -- a named parameter? |
|||
for param, value in template:gmatch ('|%s*([^=]-)%s*=%s*([^|}]+)') do -- build a table of template parameters and their values |
|||
local k, v = param:match ('%s*([^=]-)%s*=%s*([^|}]+)'); |
|||
if value then -- there must be a value but when |
|||
if v then -- there must be a value |
|||
if '' ~= value and not value:match ('^%s$') then -- skip when value is empty string or only whitespace |
|||
if '' ~= v and not v:match ('^%s$') then -- skip when value is empty string or only whitespace |
|||
params_t[k] = mw.text.trim (v); -- add trimmed value else |
|||
end |
|||
end |
end |
||
end |
end |
||
Line 444: | Line 428: | ||
local function anchor_id_make_harvc (template) |
local function anchor_id_make_harvc (template) |
||
local date = date_get (template, alias_patterns_harvc_date); |
local date = date_get (template, alias_patterns_harvc_date); -- get date; done here because might be in {{date}}; return date if valid; empty string else |
||
local anchor_id; |
local anchor_id; |
||
local params = {}; -- table of harvc parameters |
local params = {}; -- table of harvc parameters |
||
Line 460: | Line 444: | ||
end |
end |
||
anchor_id = names_get (params, aliases_harvc_author); |
anchor_id = names_get (params, aliases_harvc_author); -- get the harvc contributor names |
||
if anchor_id then |
if anchor_id then -- if names were gotten |
||
return 'CITEREF' .. anchor_id .. date; |
return 'CITEREF' .. anchor_id .. date; |
||
end |
end |
||
Line 469: | Line 453: | ||
--[[--------------------------< |
--[[--------------------------< A N C H O R _ I D _ M A K E _ W R A P P E R >---------------------------------- |
||
for wrapper templates |
|||
inspect externally visible |ref= to decide what to do: |
|||
|ref= - empty or missing: get names and date from whitelist defaults; override defaults from externally visible template parameters |
|||
|ref=harv - same as empty or missing |
|||
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters |
|||
|ref={{Harvid|name|name|name|name|year}} - assemble an anchor id from {{harvid}} positional parameters |
|||
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'? |
|||
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter |
|||
]] |
|||
local function anchor_id_make_wrapper (template) |
|||
local ref; -- content of |ref= |
|||
local template_name; -- name of the template |
|||
local anchor_id; -- the assembled anchor id from this template |
|||
local date; |
|||
local name_default; |
|||
local date_default; |
|||
local vol; |
|||
local params = {}; -- table of template parameters |
|||
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox |
|||
if not template_name or template_skip[template_name] then |
|||
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this) |
|||
end |
|||
date = date_get (template, alias_patterns_date); -- get date; done here because might be in {{date}} |
|||
-- if '' == date then |
|||
-- date = whitelist.wrapper_templates[template_name][2] or ''; -- no externally visible date so get default date |
|||
-- end |
|||
ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the wrapper template |
|||
if not ref then |
|||
if template:match ('|%s*ref%s*=([^|}]+)') then -- |ref={{template}} not found; if there is a |ref= param with an assigned value |
|||
ref = template:match ('|%s*ref%s*=([^|}]+)'); -- get the value; whitespace is a 'value' |
|||
if ref then -- nil when |ref=|... or when |ref=}} (no spaces between assignment operator and pipe or closing brace) |
|||
ref = mw.text.trim (ref); -- something, could be just whitespace, so trim leading / trailing whitespace |
|||
if '' == ref then -- trimming a string of whitespace makes an empty string |
|||
ref = nil; -- make empty ref same as missing ref |
|||
end |
|||
end |
|||
end |
|||
end |
|||
template_params_get (template, params); -- build a table of template parameters and their values |
|||
local wrap_data = whitelist.wrapper_templates[template_name] |
|||
if wrap_data[1] then -- is this wrapper a simple-default wrapper? |
|||
name_default = wrap_data[1]; -- get the default names |
|||
date_default = wrap_data[2]; -- get the default date |
|||
else |
|||
vol = params['volume'] or 'default'; |
|||
local fascicle = params['fascicle'] -- some templates use "fascicle" to mean "subvolume" |
|||
if fascicle then |
|||
local subvol = vol..'/'..fascicle -- if fascicle is used, subvolume = "vol/fascicle" |
|||
if wrap_data[subvol] then -- if subvolume exists, use it, otherwise fall back to volume |
|||
vol = subvol |
|||
end |
|||
end |
|||
if not wrap_data[vol] then -- make sure this volume exists |
|||
vol = 'default'; -- doesn't exist, use default volume |
|||
end |
|||
name_default = wrap_data[vol][1]; -- get the default names |
|||
date_default = wrap_data[vol][2]; -- get the default date |
|||
end |
|||
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty |
|||
anchor_id = names_get (params, aliases_contributor) or -- get contributor, author, or editor names |
|||
names_get (params, aliases_author) or |
|||
vnames_get (params, 'vauthors') or -- |vauthors= |
|||
names_get (params, aliases_editor) or |
|||
vnames_get (params, 'veditors') or -- |veditors= |
|||
name_default; -- default names from whitelist |
|||
-- whitelist.wrapper_templates[template_name][1]; -- default names from whitelist |
|||
if '' == date then -- if date not provided in the template |
|||
date = date_default; -- use the default date from whitelist |
|||
end |
|||
if anchor_id then -- if names were gotten |
|||
anchor_id = 'CITEREF' .. anchor_id .. date; |
|||
end |
|||
elseif ref:match ('%b{}') then -- ref holds a template |
|||
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else |
|||
elseif 'none' == ref then -- |ref=none |
|||
return nil; -- anchor id expicitly suppressed |
|||
else |
|||
anchor_id = ref; -- |ref=<text> may match an anchor id override value in {{harv}} template |ref= parameter |
|||
end |
|||
return anchor_id; -- anchor_id text; nil else |
|||
end |
|||
--[[--------------------------< A N C H O R _ I D _ M A K E _ C S 1 2 >---------------------------------------- |
|||
for cs1|2 template and cs1-like templates |
|||
inspect |ref= to decide what to do: |
inspect |ref= to decide what to do: |
||
|ref= - empty or missing: get names and date from template parameters |
|ref= - empty or missing: get names and date from template parameters; all cs1|2 create CITEREF anchor IDs |
||
|ref=harv - get names and date from template parameters |
|ref=harv - get names and date from template parameters |
||
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters |
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters |
||
Line 478: | Line 565: | ||
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'? |
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'? |
||
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter |
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter |
||
this no longer applies; all cs1|2 will soon create CITEREF anchor IDs |
|||
|ref= - empty or missing |
|||
for cs1: skip |
|||
if |mode=cs2: spoof |ref=harv |
|||
for cs2: get names and date from template parameters |
|||
if |mode=cs1: skip |
|||
]] |
]] |
||
local function |
local function anchor_id_make_cs12 (template) |
||
local ref; -- content of |ref= |
local ref; -- content of |ref= |
||
local template_name; -- name of the template |
local template_name; -- name of the template |
||
local anchor_id; -- the assembled anchor id from this template |
local anchor_id; -- the assembled anchor id from this template |
||
local date; |
local date; |
||
local params = {}; -- table of |
local params = {}; -- table of template parameters |
||
template_name = template_name_get (template); -- get |
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox |
||
if not template_name or template_skip[template_name] then |
if not template_name or template_skip[template_name] then |
||
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this) |
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this) |
||
Line 501: | Line 581: | ||
if redirects_patent[template_name] then |
if redirects_patent[template_name] then |
||
date = date_get (template, alias_patterns_patent_date); |
date = date_get (template, alias_patterns_patent_date); -- get date; done here because might be in {{date}} |
||
else |
else |
||
date = date_get (template, alias_patterns_date); |
date = date_get (template, alias_patterns_date); |
||
Line 517: | Line 597: | ||
end |
end |
||
end |
end |
||
-- this disabled because all cs1|2 templates will create CITEREF anchor IDs after next cs1|2 module-suite update |
|||
-- if not ref then -- here when |ref= missing or empty |
|||
-- if redirects_citation[template_name] then -- could be cs2 |
|||
-- if template:match ('|%s*mode%s*=%s*cs1') then |
|||
-- return nil; -- |ref= missing or empty; citation template but |mode=cs1 |
|||
-- else |
|||
-- ref = 'harv'; -- spoof to handle cs2 as if it were cs1 with |ref=harv |
|||
-- end |
|||
-- else -- |ref= missing or empty; not a cs2 template |
|||
-- if template:match ('|%s*mode%s*=%s*cs2') then |
|||
-- ref = 'harv'; -- |ref= missing or empty; not a cs2 template; |mode=cs2; spoof as if it were cs1 with |ref=harv |
|||
-- end |
|||
-- end |
|||
-- end |
|||
end |
end |
||
template_params_get (template, params); -- build a table of template parameters and their values |
template_params_get (template, params); -- build a table of template parameters and their values |
||
if not ref then |
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty |
||
if 'cite lsa' == template_name then |
|||
return 'CITEREF' .. (params.last or '') .. (params.year or ''); -- cite LSA always creates an anchor id using only |last= and |year= (no aliases) |
|||
end |
|||
-- all cs1|2 templates will create CITEREF anchor IDs after next cs1|2 module-suite update so keep going |
|||
-- return nil; -- not cite LSA so done |
|||
end |
|||
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty (new cs1|2 default is not default for other templates handled here) |
|||
if redirects_patent[template_name] then -- if this is a cite patent template |
if redirects_patent[template_name] then -- if this is a cite patent template |
||
anchor_id = names_get (params, aliases_inventor); -- inventor names only |
anchor_id = names_get (params, aliases_inventor); -- inventor names only |
||
Line 565: | Line 622: | ||
return nil; -- anchor id expicitly suppressed |
return nil; -- anchor id expicitly suppressed |
||
-- elseif '' ~= ref then -- ref is never empty string here -- |ref=<text> |
|||
else |
else |
||
anchor_id = ref; -- |ref=<text> may match an anchor id override value in {{harv}} template |ref= parameter |
anchor_id = ref; -- |ref=<text> may match an anchor id override value in {{harv}} template |ref= parameter |
||
Line 576: | Line 632: | ||
--[[--------------------------< L I S T _ A D D >-------------------------------------------------------------- |
--[[--------------------------< L I S T _ A D D >-------------------------------------------------------------- |
||
adds an item to |
adds an <item> to <list> table; for anchor IDs, the boolean <encode> argument must be set true; no return value |
||
]] |
]] |
||
local function list_add (item, list, encode) |
local function list_add (item, list, encode) |
||
if item then -- if there was an |
if item then -- if there was an item |
||
if encode then -- for anchor IDs ... |
if encode then -- for anchor IDs ... |
||
item = mw.uri.anchorEncode (item); -- encode to remove wikimarkup, convert spaces to underscores etc |
item = mw.uri.anchorEncode (item); -- encode to remove wikimarkup, convert spaces to underscores etc |
||
end |
end |
||
if not list[item] then -- if not already saved |
if not list[item] then -- if not already saved |
||
list[item] = 1; -- save it |
list[item] = 1; -- save it |
||
else -- here when this |
else -- here when this item already saved |
||
list[item] = list[item] + 1; -- to indicate that there are multiple |
list[item] = list[item] + 1; -- to indicate that there are multiple items |
||
end |
end |
||
end |
end |
||
Line 603: | Line 659: | ||
local function anchor_id_make_anchor (template, anchor_id_list) |
local function anchor_id_make_anchor (template, anchor_id_list) |
||
template = template:gsub ('^{{', ''):gsub ('}}$', '', 1); |
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name |
||
template = template:gsub ('^[^|]+|', ''); -- remove template name and first pipe |
|||
template = wikilink_strip (template); -- strip any wikilink markup (there shouldn't be any but just in case) |
template = wikilink_strip (template); -- strip any wikilink markup (there shouldn't be any but just in case) |
||
Line 610: | Line 666: | ||
local anchor_id; |
local anchor_id; |
||
for param in template:gmatch ('%b{}') do -- loop through the template; remove and save templates (presumed to sfnref or harvid) |
for param in template:gmatch ('%b{}') do -- loop through the template; remove and save templates (presumed to be sfnref or harvid) |
||
table.insert (params, param); -- save it |
table.insert (params, param); -- save it |
||
template = template:gsub ('%b{}', '', 1); -- remove it from source template |
template = template:gsub ('%b{}', '', 1); -- remove it from source template |
||
Line 617: | Line 673: | ||
for _, t in ipairs (params) do -- spin through the templates in params |
for _, t in ipairs (params) do -- spin through the templates in params |
||
anchor_id = sfnref_get (t); -- attempt to decode {{sfnref}} and {{harvid}} |
anchor_id = sfnref_get (t); -- attempt to decode {{sfnref}} and {{harvid}} |
||
if anchor_id then -- nil when not {{sfnref}} |
if anchor_id then -- nil when not {{sfnref}} or {{harvid}} |
||
list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list |
list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list |
||
end |
end |
||
Line 636: | Line 692: | ||
--[[--------------------------< |
--[[--------------------------< T E M P L A T E _ L I S T _ A D D >-------------------------------------------- |
||
makes a list of |
makes a list of templates use in the article. |
||
]] |
|||
local function template_list_add (template, template_list) |
|||
local template = template:match ('{{%s*(.-)[|}]'); -- keep the case of the template - this is different from template_name_get() |
|||
if template and not template:match ('^#') then -- found a template or magic word; ignore magic words |
|||
template=mw.text.trim (template); -- trim whitespace |
|||
template = Lang_obj:ucfirst (template); -- first character in template name must be uppercase (same as canonical template name) TODO: better way to do this? |
|||
list_add (template, template_list); -- add to list with (unused) tally |
|||
end |
|||
end |
|||
--[[--------------------------< A N C H O R _ I D _ L I S T _ M A K E >---------------------------------------- |
|||
makes a list of anchor ids from cs1|2, cs1|2-like, vcite xxx, harvc, anchor, wikicite templates |
|||
Because cs1|2 wrapper templates can, and often do, hide |
Because cs1|2 wrapper templates can, and often do, hide the author and date parameters inside the wrapper, |
||
these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that |
these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that |
||
link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}}, |
link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}}, |
||
{{sfn}}, and {{harvc}} templates to supress the error message. |
{{sfn}}, and {{harvc}} templates to supress the error message. |
||
creates a list of templates used in the article for use with the whitelist |
|||
creates a list of article-local whitelisted anchor IDs from {{sfn whitelist}} |
|||
]] |
]] |
||
local function anchor_id_list_make () |
local function anchor_id_list_make () |
||
local anchor_id_list = {} |
|||
article_content_get (); -- attempt to get this article's content |
|||
local template_list = {} |
|||
local article_whitelist = {} |
|||
local article_content = article_content_get (); -- attempt to get this article's content |
|||
if |
if article_content == '' then -- when there is no article content |
||
return ''; -- no point in continuing |
return ''; -- no point in continuing |
||
end |
end |
||
local template; -- place to hold the template that we found |
local template; -- place to hold the template that we found |
||
local template_name; |
|||
local anchor_id; -- place to hold an anchor id as it is extracted / decoded |
local anchor_id; -- place to hold an anchor id as it is extracted / decoded |
||
local tstart, tend = Article_content:find ('{{%s*[Cc]it[ae]'); -- find the first cs1|2-like template |
|||
local find_pattern = '%f[{]{{[^{]'; |
|||
while tstart do -- nil when cs1|2 template not found |
|||
local tstart, tend = article_content:find (find_pattern); -- find the first template; do not find template variables: {{{template var|}}} |
|||
while tstart do |
|||
if template then -- necessary? |
|||
template = article_content:match ('%b{}', tstart); -- get the whole template |
|||
if not template then |
|||
list_add (anchor_id, anchor_id_list, true) |
|||
break; -- template is nil for some reason (last template missing closing }} for example) so declare ourselves done |
|||
end |
end |
||
tstart = tend; -- reset the search starting index |
|||
tstart, tend = Article_content:find ('{{%s*[Cc]it[ae]', tstart); -- search for another cs1|2 template |
|||
end |
|||
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox |
|||
for _, pattern in ipairs (redirect_patterns_harvc) do |
|||
template_list_add (template, template_list); -- add this template's name to the list |
|||
tstart, tend = Article_content:find (pattern); -- find the first harvc template |
|||
while tstart do -- nil when harvc template not found |
|||
template = Article_content:match ('%b{}', tstart); -- get the whole template |
|||
if template then -- necessary? |
|||
anchor_id = anchor_id_make_harvc (template); -- extract an anchor id from this template |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
end |
|||
tstart = tend; -- reset the search starting index |
|||
tstart, tend = Article_content:find (pattern, tstart); -- search for another harvc template |
|||
end |
|||
end |
|||
if data.known_templates_cs12 [template_name] then |
|||
for _, pattern in ipairs (redirect_patterns_vcite) do -- for each of the vcite family template base patterns |
|||
anchor_id = anchor_id_make_cs12 (template); -- extract an anchor id from this template |
|||
list_add (anchor_id, anchor_id_list, true) |
|||
while tstart do -- nil when vcite template not found |
|||
template = Article_content:match ('%b{}', tstart); -- get the whole template |
|||
if template then -- necessary? |
|||
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the vcite template |
|||
if ref then -- |ref={{template}} |
|||
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
else |
|||
local params = {}; |
|||
local template_name = template_name_get (template); -- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox |
|||
elseif data.known_templates_vcite [template_name] then |
|||
template_params_get (template, params); -- build a table of template parameters and their values |
|||
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the vcite template |
|||
if ref then -- |ref={{template}} |
|||
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
else |
|||
local params = {}; |
|||
template_params_get (template, params); -- build a table of template parameters and their values |
|||
if not anchor_id and params['harvid'] then |
|||
anchor_id = params['ref']; -- when both set, vcite uses value from |ref= |
|||
if not anchor_id and params['harvid'] then |
|||
end |
|||
anchor_id = 'CITEREF' .. params['harvid']; -- in vcite, |harvid= auto-adds 'CITEREF' prefix to the value in |harvid= |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
end |
end |
||
list_add (anchor_id, anchor_id_list, true); |
|||
end |
end |
||
elseif data.known_templates_harvc [template_name] then |
|||
tstart = tend; -- reset the search starting index |
|||
anchor_id = anchor_id_make_harvc (template); -- extract an anchor id from this template |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
end |
|||
end |
|||
elseif data.known_templates_wikicite [template_name] then |
|||
tstart, tend = Article_content:find ('{{%s*[Ww]ikicite'); -- find the first {{wikicite}} template |
|||
while tstart do -- nil when cs1|2 template not found |
|||
template = Article_content:match ('%b{}', tstart); -- get the whole template |
|||
if template then |
|||
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} |
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} |
||
Line 729: | Line 784: | ||
elseif template:match ('|%s*id%s*=%s*(%b{})') then |
elseif template:match ('|%s*id%s*=%s*(%b{})') then |
||
ref = template:match ('|%s*id%s*=%s*(%b{})'); |
|||
anchor_id = 'Reference-' .. sfnref_get (ref); |
|||
elseif template:match ('|%s*id%s*=([^|}]+)') then |
elseif template:match ('|%s*id%s*=([^|}]+)') then |
||
anchor_id = 'Reference-' .. template:match ('|%s*id%s*=([^|}]+)'); -- plain-text |
anchor_id = 'Reference-' .. template:match ('|%s*id%s*=([^|}]+)'); -- plain-text |
||
Line 741: | Line 797: | ||
list_add (anchor_id, anchor_id_list, true); |
list_add (anchor_id, anchor_id_list, true); |
||
end |
end |
||
end |
|||
tstart = tend; -- reset the search starting index |
|||
tstart, tend = Article_content:find ('{{%s*[Ww]ikicite', tstart); -- search for another cs1|2 template |
|||
end |
|||
for _, pattern in ipairs (redirect_patterns_anchor) do |
|||
tstart, tend = Article_content:find (pattern); -- find the first anchor template |
|||
while tstart do -- nil when anchor template not found |
|||
template = Article_content:match ('%b{}', tstart); -- get the whole template |
|||
if template then -- necessary? |
|||
anchor_id_make_anchor (template, anchor_id_list); -- extract anchor ids from this template if any |
|||
end |
|||
tstart = tend; -- reset the search starting index |
|||
tstart, tend = Article_content:find (pattern, tstart); -- search for another anchor template |
|||
end |
|||
end |
|||
elseif data.known_templates_anchor [template_name] then |
|||
anchor_id_make_anchor (template, anchor_id_list); -- extract anchor ids from this template if any |
|||
elseif data.known_templates_sfn_whitelist [template_name] then |
|||
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name |
|||
template = mw.text.trim (template, '%s|'); -- trim leading trailing white space and pipes |
|||
template = mw.text.split (template, '%s*|%s*'); -- make a table of the template's parameters |
|||
for _, anchor_id in ipairs (template) do -- spin through this template's parameter |
|||
mw.logObject (anchor_id_list, 'anchor_id_list') |
|||
if '' ~= anchor_id and not article_whitelist[anchor_id] then |
|||
return anchor_id_list; |
|||
anchor_id = mw.uri.anchorEncode (anchor_id) |
|||
end |
|||
article_whitelist[anchor_id] = 1; -- add to the whitelist |
|||
end |
|||
end |
|||
elseif template_name and whitelist.wrapper_templates[template_name] then |
|||
anchor_id = anchor_id_make_wrapper (template); -- extract an anchor id from this template if possible |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
--[[--------------------------< T E M P L A T E _ L I S T _ M A K E >------------------------------------------ |
|||
elseif template_name and template_name:match ('^Cit[ea]') then -- not known, not known wrapper; last gasp, try as cs1-like |
|||
anchor_id = anchor_id_make_cs12 (template); -- extract an anchor id from this template if possible |
|||
list_add (anchor_id, anchor_id_list, true); |
|||
end |
|||
tstart, tend = article_content:find (find_pattern, tend); -- search for another template; begin at end of last search |
|||
makes a list of templates use in the article. |
|||
end |
|||
mw.logObject (anchor_id_list, 'anchor_id_list'); |
|||
]] |
|||
mw.logObject (template_list, 'template_list'); |
|||
mw.logObject (article_whitelist, 'article_whitelist'); |
|||
local Lang_obj = mw.language.getContentLanguage(); |
|||
global_anchor_id_list = anchor_id_list |
|||
local function template_list_make () |
|||
global_template_list = template_list |
|||
article_content_get (); -- attempt to get this article's content |
|||
global_article_whitelist = article_whitelist |
|||
if '' == Article_content then -- when there is no article content |
|||
return ''; -- no point in continuing |
|||
end |
|||
local template_list = {}; |
|||
for template in Article_content:gmatch ('{{%s*(.-)[|}]') do |
|||
if template and not template:match ('^#') then -- found a template or magic word; ignore magic words |
|||
template=mw.text.trim (template); -- trim whitespace |
|||
template = Lang_obj:ucfirst (template); -- first character in template name must be uppercase (same as canonical template name) |
|||
list_add (template, template_list); -- add to list with (unused) tally |
|||
end |
|||
end |
|||
mw.logObject (template_list, 'template_list') |
|||
return template_list; |
|||
end |
end |
||
--[[--------------------------< C I T E R E F _ P A T T E R N S _ M A K E >-------------------------------------------- |
|||
Scans template_list to look for wrapper templates that generate citerefs that require Lua patterns. |
|||
--[[--------------------------< A R T I C L E _ L O C A L _ W H I T E L I S T _ M A K E >---------------------- |
|||
This scan is only done once per page load, to save time |
|||
makes a list of templates use in the article. |
|||
]] |
]] |
||
local function |
local function citeref_patterns_make() |
||
if not global_template_list then return end |
|||
article_content_get (); -- attempt to get this article's content |
|||
local citeref_patterns = {} |
|||
local template_patterns = whitelist.wrapper_template_patterns |
|||
if '' == Article_content then -- when there is no article content |
|||
for _, p in ipairs(template_patterns) do |
|||
return ''; -- no point in continuing |
|||
for _, t in ipairs(p[1]) do -- loop through list of template wrappers |
|||
end |
|||
if global_template_list[t] then -- if wrapper is found in article, record corresponding patterns |
|||
for _, pat in ipairs(p[2]) do |
|||
local article_whitelist = {}; |
|||
table.insert(citeref_patterns, pat) |
|||
local tstart, tend; |
|||
end |
|||
local template; |
|||
break |
|||
end |
|||
for _, pattern in ipairs (redirect_patterns_sfn_whitelist) do |
|||
end |
|||
tstart, tend = Article_content:find (pattern); -- find the first whitelist template |
|||
end |
|||
mw.logObject(citeref_patterns,'citeref_patterns') |
|||
while tstart do -- nil when whitelist template not found |
|||
return citeref_patterns |
|||
template = Article_content:match ('%b{}', tstart); -- get the whole template |
|||
if template then -- necessary? |
|||
template = template:gsub (pattern, ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name |
|||
template = mw.text.trim (template, '%s|'); -- trim leading trailing white space and pipes |
|||
template = mw.text.split (template, '%s*|%s*'); -- make a table of the template's parameters |
|||
for _, anchor_id in ipairs (template) do -- spin through this template's parameter |
|||
if '' ~= anchor_id and not article_whitelist[anchor_id] then |
|||
article_whitelist[anchor_id] = 1; -- add to the whitelist |
|||
end |
|||
end |
|||
end |
|||
tstart = tend; -- reset the search starting index |
|||
tstart, tend = Article_content:find (pattern, tstart); -- search for another whitelist template |
|||
end |
|||
end |
|||
mw.logObject (article_whitelist, 'article_whitelist') |
|||
return article_whitelist; |
|||
end |
end |
||
Line 843: | Line 866: | ||
]] |
]] |
||
-- First create global_anchor_id_list, global_template_list, global_article_whitelist |
|||
anchor_id_list_make() |
|||
-- Then stuff them (and derived tables) into return table |
|||
return { |
return { |
||
anchor_id_list = |
anchor_id_list = global_anchor_id_list or {}, -- table of anchor ids available in this article |
||
article_whitelist = |
article_whitelist = global_article_whitelist or {}, -- table of anchor ids with false-positive error message to be suppressed |
||
template_list = |
template_list = global_template_list or {}, -- table of templates used in this article |
||
citeref_patterns = citeref_patterns_make() or {} -- table of Lua patterns to search for citeref from wrappers |
|||
} |
} |
Latest revision as of 02:04, 14 January 2025
Documentation for this module may be created at Module:Footnotes/anchor id list/doc
require('strict');
local data = mw.loadData ('Module:Footnotes/anchor id list/data');
local whitelist = mw.loadData ('Module:Footnotes/whitelist');
local Lang_obj = mw.language.getContentLanguage(); -- used by template_list_add() to uppercase first letter of template name TODO: better way to do that?
local redirects_date = {
['date'] = true,
['datetomos'] = true,
['formatdate'] = true,
['isotodmymdy'] = true,
['isotomos'] = true,
}
local redirects_patent = { -- special case cs1-like templates because uses different parameters for name and date in anchor ID
['Cite patent'] = true,
['Citeref patent'] = true,
['Ref patent'] = true,
}
local redirects_sfnref = {
['sfnref'] = true,
['harvid'] = true,
}
local aliases_author = { -- these use pseudo-patterns in the same way as cs1|2; '#' represents 1 or more enumerator digits
'last#',
'author#',
'surname#',
'author-last#',
'author#-last',
'subject#',
'host#',
}
local aliases_contributor = {
'contributor#',
'contributor-last#',
'contributor#-last',
'contributor-surname#',
'contributor#-surname',
}
local aliases_editor = {
'editor#',
'editor-last#',
'editor#-last',
'editor-surname#',
'editor#-surname',
}
local aliases_harvc_author = {
'last#',
'author#',
}
local aliases_inventor = { -- cite patent
'inventor#',
'inventor-last#',
'inventor#-last',
'inventor-surname#',
'inventor#-surname',
'invent#',
'invent-#',
}
local alias_patterns_date = { -- normal lua patterns for most cs1|2-like templates
'|%s*year%s*=%s*',
'|%s*date%s*=%s*',
'|%s*publication%-?date%s*=%s*',
'|%s*air%-?date%s*=%s*',
}
local alias_patterns_harvc_date = { -- normal lua patterns for harvc template
'|%s*anchor%-year%s*=%s*',
'|%s*year%s*=%s*',
}
local alias_patterns_patent_date = { -- normal lua patterns for cite patent templates
'|%s*issue%-date%s*=%s*',
'|%s*gdate%s*=%s*',
'|%s*publication%-date%s*=%s*',
'|%s*pubdate%s*=%s*',
}
local patterns_date = { -- normal lua patterns
-- '(%d%d%d%d–%d%d%d%d%l?)$', -- YYYY–YYYY four-digit year range at end (Season YYYY–YYYY); with or without dab
'(%d%d%d%d)%D+(%d%d%d%d%l?)$', -- any range with four-digit years; with or without dab; not two captures
'^(%d%d%d%d–%d%d%l?)$', -- YYYY–YY two-digit year range; with or without dab
'^(c%. %d%d%d%d?%l?)$', -- three- or four-digit circa year; with or without dab
'(%d%d%d%d?%l?)$', -- three- or four-digit year at end of date (dmy or mdy); with or without dab
'^(%d%d%d%d?%l?)', -- three- or four-digit year at beginning of date (ymd or YYYY); with or without dab
'^(n%.d%.%l?)$', -- 'no date' with dots; with or without dab
'^(nd%l?)$', -- 'no date' without dots; with or without dab
}
local patterns_tags = {
'<nowiki>.-</nowiki>',
'<!%-%-.-%-%->',
'<pre>.-</pre>',
'<syntaxhighlight.->.-</syntaxhighlight>',
'<source.->.-</source>', -- deprecated alias of syntaxhighlight tag
}
local template_skip = { -- templates to be skipped for whatever reason; mostly because they resemble cs1-like templates
['Citation-attribution'] = true,
}
local global_article_content = nil
local global_anchor_id_list = nil -- exported tables
local global_template_list = nil
local global_article_whitelist = nil
--[[--------------------------< A R T I C L E _ C O N T E N T _ G E T >----------------------------------------
get article content, remove certain html-like tags and their content so that this code doesn't include any citation
templates inside the tags as valid tagets; they are not.
]]
local function article_content_get ()
if global_article_content then return global_article_content end
local article_content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
for _, tag in ipairs (patterns_tags) do
article_content = article_content:gsub (tag, ''); -- remove certain html-like tags and their content
end
global_article_content = article_content
return article_content
end
--[[--------------------------< S F N R E F _ G E T >----------------------------------------------------------
make an anchor id from the contents of {{sfnref}} or {{harvid}}. this function assumes that {{sfnref}} and {{harvid}}
are correctly formed.
]]
local function sfnref_get (template)
template = template:gsub ('{{%s*(.-)%s*}}', '%1'); -- strip bounding template markup and trim
local parts = mw.text.split (template, '%s*|%s*'); -- split at the pipe and remove extraneous space characters
local anchor_id = {};
if redirects_sfnref[parts[1]:lower()] then
anchor_id[1] = 'CITEREF';
else
return nil; -- not an sfnref or harvid template
end
local i = 2; -- indexer into parts{} table
local j = 2; -- indexer into anchor_id{} table which already has 'CITEREF' at [1]
while parts[i] and 7 > j do -- loop through what should be just positional parameters for names and year (2-6 four names and a date)
if not parts[i]:find ('=') then -- look for equal sign (named paraneter in a template that doesn't support named parameters)
anchor_id[j] = parts[i]; -- positional parameters are saved
j = j+1; -- bump the anchor_id{} indexer
end
i = i+ 1; -- bump the parts{} indexer
end
return table.concat (anchor_id, '');
end
--[[--------------------------< D A T E _ G E T >--------------------------------------------------------------
extract year from one of |year=, |date=, |publicationdate=, or |publication-date in that order. Does not error
check (that is left to the cs1|2 templates to do)
also gets date from |<date alias>={{date|...}}
]]
local function date_get (template, aliases)
local rvalue;
for _, pattern in ipairs (aliases) do -- spin through the date alias patterns
rvalue = tostring(template):match (pattern); -- is this |<date alias>= used (tostring() because something makes match() think template is a table)
if rvalue then
rvalue = tostring(template):match (pattern .. '(%b{})'); -- is rvalue a template?
if rvalue then
rvalue = rvalue:gsub ('{{%s*(.-)%s*}}', '%1'); -- strip bounding template markup and trim
local parts = mw.text.split (rvalue, '%s*|%s*'); -- split at the pipe and remove extraneous space characters
if redirects_date[parts[1]:lower()] then -- if parts[1] names {{date}} or redirect
rvalue = parts[2]; -- assume that date template is properly formed, first positional parameter is the date
else
return ''; -- |date= holds some other template than {{date}} or redirect
end
else
rvalue = template:match (pattern .. '([^|}]+)');
if rvalue then -- if rvalue is something
rvalue = mw.text.trim (rvalue); -- trim it
end
if not rvalue or '' == rvalue then -- if rvale was nothing or trimed to nothing
rvalue = nil; -- ensure that it is unset so we can try the next parameter in the list
end
end
if rvalue then
for _, pattern in ipairs (patterns_date) do -- spin through the recognized date formats
-- date = rvalue:match (pattern); -- attempt to extract year portion according to the pattern
local date, date2 = rvalue:match (pattern); -- attempt to extract year portion according to the pattern; <date2> gets second year in any range
if date then
if date2 then -- when a second year
date = table.concat ({date, '–', date2}); -- build a date range
end
return date; -- matched so return;
end
end
break; -- found a date but it was malformed so abandon
end
end
end
return ''; -- no date param or date param doesn't hold a recognized date; empty string for concatenation
end
--[[--------------------------< V N A M E S _ G E T >----------------------------------------------------------
extract names from |vauthors= or |veditors=; there is no |vcontributors= parameter.
splits the v parameter value at the comma; correctly handles accept-as-witten markup when used to wrap a comma-
separated names (corporate)
]]
local function vnames_get (params, vparam)
local vnames = {}; -- first four author or editor names go here
local split = {}; -- temp table to assist in decoding accept-as-witten-markup
if params[vparam] then -- test for |vauthors= or |veditor=
split = mw.text.split (params[vparam], '%s*,%s*'); -- this will separate portions of ((Black, Brown, White, an Co.))
local i = 1; -- an indexer
while split[i] do
if split[i]:match ('^%(%(.*[^%)][^%)]$') then -- first segment of comma-separated accept-as-witten; this segment has the opening doubled parens
local name = split[i];
i=i+1; -- bump indexer to next segment
while split[i] do
name = name .. ', ' .. split[i]; -- concatenate with previous segments
if split[i]:match ('^.*%)%)$') then -- if this table member has the closing doubled parens
break; -- and done reassembling so
end
i=i+1; -- bump indexer
end
table.insert (vnames, name); -- and add accept-as-witten name to the vnames table
else
table.insert (vnames, split[i]); -- and add name to the vnames table
end
i=i+1; -- bump indexer
if 5 == i then break; end -- limit to four names
end
for i, vname in ipairs (vnames) do
if not vname:match ('%(%(.-%)%)') then -- without accept-this-value-as-written markup
vnames[i] = vname:gsub ('(.-)%s+%u+$', '%1'); -- extract and save surname(s)
end
end
for i, vname in ipairs (vnames) do -- repeat, this time for accept-this-value-as-written markup
vnames[i] = vname:gsub ('%(%((.-)%)%)', '%1'); -- remove markup if present and save the whole name
end
end
return 0 ~= #vnames and table.concat (vnames) or nil -- return a concatenation of the vnames; nil else
end
--[[--------------------------< N A M E S _ G E T >------------------------------------------------------------
cs1|2 makes anchor id from contributor, author, or editor name-lists in that order
get the names from the cs1|2 template; if there are no contributor names, try author names, then try editor names.
returns concatenated names in enumeration order when successful; nil else
empty name (nameholding parameter n is present without value) and missing name (nameholding parameter n is not
present) are included as empty string with all other names
]]
local function names_get (params, aliases_list)
local names = {}; -- first four author or editor names go here
local enum_alias; -- alias with '#' replaced with a digit
for enum=1, 4 do -- four names only
for i, alias in ipairs (aliases_list) do
if not names[enum] then -- hanven't found a previous alias with this [enum]? see if we can find this alias with this enum
enum_alias = alias:gsub ('#', enum); -- replace '#' to make 'lastn'
if 1 == enum then -- because |last= and last1= are exact aliases
if params[enum_alias] then -- test |last1= first
names[enum] = params[enum_alias]; -- found so save the value assigned to |last1=
break; -- next enum
else
enum_alias = alias:gsub ('#', ''); -- replace '#' to make 'last'
if params[enum_alias] then
names[enum] = params[enum_alias]; -- found so save the value assigned to |last=
break; -- next enum
end
end
else -- here for enum 2, 3, 4
if params[enum_alias] then
names[enum] = params[enum_alias]; -- found so save the value assigned to |lastn=
break; -- next enum
end
end
end
end
end
for enum=1, 4 do -- spin through the names table and
local name = names[enum];
if not name then -- when nameholding parameter n is not present (nil)
name = ''; -- convert to empty string for concatenation
end
name = name:gsub('%(%((.-)%)%)', '%1'); -- remove accept-as-written markup if present
names[enum] = name; -- save the modified name
end
local name_str = table.concat (names); -- concatenate the names
return '' ~= name_str and name_str or nil; -- return the concatenation if not empty string; nil else
end
--[[--------------------------< T E M P L A T E _ S T R I P >--------------------------------------------------
removes the citation or havrc template's {{ and }} markup then removes, in whole, any templates found inside the
citation or harvc template.
Templates are not allowed in parameters that are made part of COinS metadata; yet, they will appear. cs1|2 does
not see the template markup but instead sees the result of the template as html. cs1|2 strips the html which
leaves the displayed value for the anchor id. We can't do that here so, because templates aren't allowed in
parameters, we simply discard any templates found in the cs1|2 template.
this may leave a |lastn= parameter empty which will be treated as if it were really empty as cs1|2 do (three authors,
|last2= empty -> CITEREFLast1Last3YYYY (the harv and sfn render: 'Last1, & Last3 YYYY' with CITEREFLast1Last3YYYY).
]]
local function template_strip (template)
template = template:gsub ('^{{%s*', ''):gsub ('%s*}}$', '', 1); -- remove outer {{ and }} (cs1|2 template delimiters with trailing/leading whitespace)
template = template:gsub ('%b{}', ''); -- remove any templates from the cs1|2 template
return template;
end
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
Returns a string where all of lua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]
local function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
return argument;
end
--[=[-------------------------< W I K I L I N K _ S T R I P >--------------------------------------------------
Wikilink markup does not belong in an anchor id and can / does confuse the code that parses apart citation and
harvc templates so here we remove any wiki markup:
[[link|label]] -> label
[[link]] -> link
]=]
local function wikilink_strip (template)
for wikilink in template:gmatch ('%[%b[]%]') do -- get a wikilink
template = template:gsub ('%[%b[]%]', '__57r1P__', 1); -- install a marker
if wikilink:match ('%[%[.-|(.-)%]%]') then
wikilink = wikilink:match ('%[%[.-|(.-)%]%]'); -- extract label from complex [[link|label]] wikilink
else
wikilink = wikilink:match ('%[%[(.-)%]%]'); -- extract link from simple [[link]] wikilinks
end
wikilink = escape_lua_magic_chars (wikilink); -- in case there are lua magic characters in wikilink
template = template:gsub ('__57r1P__', wikilink, 1); -- replace the marker with the appropriate text
end
return template;
end
--[[--------------------------< T E M P L A T E _ N A M E _ G E T >--------------------------------------------
return the citation or harvc template's name; convert to lower case and trim leading and trailing whitespace;
when the template is a sandbox the subpage portion of the template name is omitted from the returned template name
{{Cite book/new |...}} returns cite book
]]
local function template_name_get (template)
local template_name = template:match ('^{{%s*([^/|}]+)'); -- get template name; ignore subpages ~/new, ~/sandbox; parser functions
if not template_name or template_name:match ('^#') then -- parser functions, magic words don't count as templates
return nil; -- could not get template name from (possibly corrupt) template; extraneous opening { mid template can cause this;
end;
template_name = template_name:gsub ('%s*$', ''); -- trim trailing whitespace; leading whitespace already removed
return Lang_obj:ucfirst (template_name); -- first character in template name must be uppercase (same as canonical template name) TODO: better way to do this?
end
--[[--------------------------< T E M P L A T E _ P A R A M S _ G E T >----------------------------------------
parse apart a template's parameters and store in the params table where key is the parameter's name and value is
the parameter's value; empty parameters are not saved
]]
local function template_params_get (template, params_t)
template = wikilink_strip (template); -- because piped wikilinks confuse code that builds params_t{} and because wikilinks not allowed in an anchor id
-- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed
template = template_strip (template); -- because template markup can confuse code that builds params_t{} and because templates in name parameters are not allowed
local temp_t = mw.text.split (template, '%s*|%s*'); --split on the pipe
for _, param in ipairs (temp_t) do
if param:find ('=', 1, true) then -- a named parameter?
local k, v = param:match ('%s*([^=]-)%s*=%s*([^|}]+)');
if v then -- there must be a value
if '' ~= v and not v:match ('^%s$') then -- skip when value is empty string or only whitespace
params_t[k] = mw.text.trim (v); -- add trimmed value else
end
end
end
end
end
--[[--------------------------< C I T E R E F _ M A K E _ H A R V C >------------------------------------------
makes anchor_id from {{harvc}} or redirects
]]
local function anchor_id_make_harvc (template)
local date = date_get (template, alias_patterns_harvc_date); -- get date; done here because might be in {{date}}; return date if valid; empty string else
local anchor_id;
local params = {}; -- table of harvc parameters
local id; -- custom anchor id for this {{harvc}} template
id = template:match ('|%s*id%s*=%s*(%b{})'); -- in case |id={{sfnref}}; done here because templates will be stripped
template_params_get (template, params); -- build a table of template parameters and their values; this strips wikilinks and templates
if id then -- when set is {{sfnref}} or {{harvid}} template
return sfnref_get (id); -- returns content of {{sfnref}} or {{harvid}}; nil else
end
if params.id then -- custom anchor for this {{harvc}} template (text)
return params.id; -- |id= value as written
end
anchor_id = names_get (params, aliases_harvc_author); -- get the harvc contributor names
if anchor_id then -- if names were gotten
return 'CITEREF' .. anchor_id .. date;
end
return nil; -- no names; no anchor_id
end
--[[--------------------------< A N C H O R _ I D _ M A K E _ W R A P P E R >----------------------------------
for wrapper templates
inspect externally visible |ref= to decide what to do:
|ref= - empty or missing: get names and date from whitelist defaults; override defaults from externally visible template parameters
|ref=harv - same as empty or missing
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters
|ref={{Harvid|name|name|name|name|year}} - assemble an anchor id from {{harvid}} positional parameters
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'?
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter
]]
local function anchor_id_make_wrapper (template)
local ref; -- content of |ref=
local template_name; -- name of the template
local anchor_id; -- the assembled anchor id from this template
local date;
local name_default;
local date_default;
local vol;
local params = {}; -- table of template parameters
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox
if not template_name or template_skip[template_name] then
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this)
end
date = date_get (template, alias_patterns_date); -- get date; done here because might be in {{date}}
-- if '' == date then
-- date = whitelist.wrapper_templates[template_name][2] or ''; -- no externally visible date so get default date
-- end
ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the wrapper template
if not ref then
if template:match ('|%s*ref%s*=([^|}]+)') then -- |ref={{template}} not found; if there is a |ref= param with an assigned value
ref = template:match ('|%s*ref%s*=([^|}]+)'); -- get the value; whitespace is a 'value'
if ref then -- nil when |ref=|... or when |ref=}} (no spaces between assignment operator and pipe or closing brace)
ref = mw.text.trim (ref); -- something, could be just whitespace, so trim leading / trailing whitespace
if '' == ref then -- trimming a string of whitespace makes an empty string
ref = nil; -- make empty ref same as missing ref
end
end
end
end
template_params_get (template, params); -- build a table of template parameters and their values
local wrap_data = whitelist.wrapper_templates[template_name]
if wrap_data[1] then -- is this wrapper a simple-default wrapper?
name_default = wrap_data[1]; -- get the default names
date_default = wrap_data[2]; -- get the default date
else
vol = params['volume'] or 'default';
local fascicle = params['fascicle'] -- some templates use "fascicle" to mean "subvolume"
if fascicle then
local subvol = vol..'/'..fascicle -- if fascicle is used, subvolume = "vol/fascicle"
if wrap_data[subvol] then -- if subvolume exists, use it, otherwise fall back to volume
vol = subvol
end
end
if not wrap_data[vol] then -- make sure this volume exists
vol = 'default'; -- doesn't exist, use default volume
end
name_default = wrap_data[vol][1]; -- get the default names
date_default = wrap_data[vol][2]; -- get the default date
end
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty
anchor_id = names_get (params, aliases_contributor) or -- get contributor, author, or editor names
names_get (params, aliases_author) or
vnames_get (params, 'vauthors') or -- |vauthors=
names_get (params, aliases_editor) or
vnames_get (params, 'veditors') or -- |veditors=
name_default; -- default names from whitelist
-- whitelist.wrapper_templates[template_name][1]; -- default names from whitelist
if '' == date then -- if date not provided in the template
date = date_default; -- use the default date from whitelist
end
if anchor_id then -- if names were gotten
anchor_id = 'CITEREF' .. anchor_id .. date;
end
elseif ref:match ('%b{}') then -- ref holds a template
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else
elseif 'none' == ref then -- |ref=none
return nil; -- anchor id expicitly suppressed
else
anchor_id = ref; -- |ref=<text> may match an anchor id override value in {{harv}} template |ref= parameter
end
return anchor_id; -- anchor_id text; nil else
end
--[[--------------------------< A N C H O R _ I D _ M A K E _ C S 1 2 >----------------------------------------
for cs1|2 template and cs1-like templates
inspect |ref= to decide what to do:
|ref= - empty or missing: get names and date from template parameters; all cs1|2 create CITEREF anchor IDs
|ref=harv - get names and date from template parameters
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters
|ref={{Harvid|name|name|name|name|year}} - assemble an anchor id from {{harvid}} positional parameters
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'?
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter
]]
local function anchor_id_make_cs12 (template)
local ref; -- content of |ref=
local template_name; -- name of the template
local anchor_id; -- the assembled anchor id from this template
local date;
local params = {}; -- table of template parameters
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox
if not template_name or template_skip[template_name] then
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this)
end
if redirects_patent[template_name] then
date = date_get (template, alias_patterns_patent_date); -- get date; done here because might be in {{date}}
else
date = date_get (template, alias_patterns_date);
end
ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the cs1|2 template
if not ref then
if template:match ('|%s*ref%s*=([^|}]+)') then -- |ref={{template}} not found; if there is a |ref= param with an assigned value
ref = template:match ('|%s*ref%s*=([^|}]+)'); -- get the value; whitespace is a 'value'
if ref then -- nil when |ref=|... or when |ref=}} (no spaces between assignment operator and pipe or closing brace)
ref = mw.text.trim (ref); -- something, could be just whitespace, so trim leading / trailing whitespace
if '' == ref then -- trimming a string of whitespace makes an empty string
ref = nil; -- make empty ref same as missing ref
end
end
end
end
template_params_get (template, params); -- build a table of template parameters and their values
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty
if redirects_patent[template_name] then -- if this is a cite patent template
anchor_id = names_get (params, aliases_inventor); -- inventor names only
else -- cs1|2 template
anchor_id = names_get (params, aliases_contributor) or -- get contributor, author, or editor names
names_get (params, aliases_author) or
vnames_get (params, 'vauthors') or -- |vauthors=
names_get (params, aliases_editor) or
vnames_get (params, 'veditors'); -- |veditors=
end
if anchor_id then -- if names were gotten
anchor_id = 'CITEREF' .. anchor_id .. date;
end
elseif ref:match ('%b{}') then -- ref holds a template
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else
elseif 'none' == ref and not redirects_patent[template_name] then -- |ref=none; not supported by cite patent
return nil; -- anchor id expicitly suppressed
else
anchor_id = ref; -- |ref=<text> may match an anchor id override value in {{harv}} template |ref= parameter
end
return anchor_id; -- anchor_id text; nil else
end
--[[--------------------------< L I S T _ A D D >--------------------------------------------------------------
adds an <item> to <list> table; for anchor IDs, the boolean <encode> argument must be set true; no return value
]]
local function list_add (item, list, encode)
if item then -- if there was an item
if encode then -- for anchor IDs ...
item = mw.uri.anchorEncode (item); -- encode to remove wikimarkup, convert spaces to underscores etc
end
if not list[item] then -- if not already saved
list[item] = 1; -- save it
else -- here when this item already saved
list[item] = list[item] + 1; -- to indicate that there are multiple items
end
end
end
--[[--------------------------< A N C H O R _ I D _ M A K E _ A N C H O R >------------------------------------
make anchor IDs from {{anchor}}; there may be more than one because {{anchor}} is not limited to the number of
anchors it may hold.
]]
local function anchor_id_make_anchor (template, anchor_id_list)
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name
template = wikilink_strip (template); -- strip any wikilink markup (there shouldn't be any but just in case)
local params = {};
local anchor_id;
for param in template:gmatch ('%b{}') do -- loop through the template; remove and save templates (presumed to be sfnref or harvid)
table.insert (params, param); -- save it
template = template:gsub ('%b{}', '', 1); -- remove it from source template
end
for _, t in ipairs (params) do -- spin through the templates in params
anchor_id = sfnref_get (t); -- attempt to decode {{sfnref}} and {{harvid}}
if anchor_id then -- nil when not {{sfnref}} or {{harvid}}
list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list
end
end
template = template:gsub ('|%s*|', '|'); -- when pipe follows pipe with or without white space, remove extraneous pipe
template = template:gsub ('^|', ''):gsub('|$', ''); -- remove extraneous leading and trailing pipes
params = mw.text.split (template, '%s*|%s*'); -- split at the pipe and remove extraneous space characters
for _, t in ipairs (params) do -- spin through the anchor IDs
anchor_id = mw.text.trim (t); -- trim white space
if '' ~= anchor_id then -- should always have something
list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list
end
end
end
--[[--------------------------< T E M P L A T E _ L I S T _ A D D >--------------------------------------------
makes a list of templates use in the article.
]]
local function template_list_add (template, template_list)
local template = template:match ('{{%s*(.-)[|}]'); -- keep the case of the template - this is different from template_name_get()
if template and not template:match ('^#') then -- found a template or magic word; ignore magic words
template=mw.text.trim (template); -- trim whitespace
template = Lang_obj:ucfirst (template); -- first character in template name must be uppercase (same as canonical template name) TODO: better way to do this?
list_add (template, template_list); -- add to list with (unused) tally
end
end
--[[--------------------------< A N C H O R _ I D _ L I S T _ M A K E >----------------------------------------
makes a list of anchor ids from cs1|2, cs1|2-like, vcite xxx, harvc, anchor, wikicite templates
Because cs1|2 wrapper templates can, and often do, hide the author and date parameters inside the wrapper,
these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that
link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}},
{{sfn}}, and {{harvc}} templates to supress the error message.
creates a list of templates used in the article for use with the whitelist
creates a list of article-local whitelisted anchor IDs from {{sfn whitelist}}
]]
local function anchor_id_list_make ()
local anchor_id_list = {}
local template_list = {}
local article_whitelist = {}
local article_content = article_content_get (); -- attempt to get this article's content
if article_content == '' then -- when there is no article content
return ''; -- no point in continuing
end
local template; -- place to hold the template that we found
local template_name;
local anchor_id; -- place to hold an anchor id as it is extracted / decoded
local find_pattern = '%f[{]{{[^{]';
local tstart, tend = article_content:find (find_pattern); -- find the first template; do not find template variables: {{{template var|}}}
while tstart do
template = article_content:match ('%b{}', tstart); -- get the whole template
if not template then
break; -- template is nil for some reason (last template missing closing }} for example) so declare ourselves done
end
template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox
template_list_add (template, template_list); -- add this template's name to the list
if data.known_templates_cs12 [template_name] then
anchor_id = anchor_id_make_cs12 (template); -- extract an anchor id from this template
list_add (anchor_id, anchor_id_list, true)
elseif data.known_templates_vcite [template_name] then
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the vcite template
if ref then -- |ref={{template}}
anchor_id = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else
list_add (anchor_id, anchor_id_list, true);
else
local params = {};
template_params_get (template, params); -- build a table of template parameters and their values
anchor_id = params['ref']; -- when both set, vcite uses value from |ref=
if not anchor_id and params['harvid'] then
anchor_id = 'CITEREF' .. params['harvid']; -- in vcite, |harvid= auto-adds 'CITEREF' prefix to the value in |harvid=
end
list_add (anchor_id, anchor_id_list, true);
end
elseif data.known_templates_harvc [template_name] then
anchor_id = anchor_id_make_harvc (template); -- extract an anchor id from this template
list_add (anchor_id, anchor_id_list, true);
elseif data.known_templates_wikicite [template_name] then
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}}
if ref then
anchor_id = sfnref_get (ref);
elseif template:match ('|%s*ref%s*=([^|}]+)') then
anchor_id = template:match ('|%s*ref%s*=([^|}]+)'); -- plain-text
elseif template:match ('|%s*id%s*=%s*(%b{})') then
ref = template:match ('|%s*id%s*=%s*(%b{})');
anchor_id = 'Reference-' .. sfnref_get (ref);
elseif template:match ('|%s*id%s*=([^|}]+)') then
anchor_id = 'Reference-' .. template:match ('|%s*id%s*=([^|}]+)'); -- plain-text
else
anchor_id = nil; -- no matches, ensure that anchor_id has no value
end
if anchor_id then
list_add (anchor_id, anchor_id_list, true);
end
elseif data.known_templates_anchor [template_name] then
anchor_id_make_anchor (template, anchor_id_list); -- extract anchor ids from this template if any
elseif data.known_templates_sfn_whitelist [template_name] then
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name
template = mw.text.trim (template, '%s|'); -- trim leading trailing white space and pipes
template = mw.text.split (template, '%s*|%s*'); -- make a table of the template's parameters
for _, anchor_id in ipairs (template) do -- spin through this template's parameter
if '' ~= anchor_id and not article_whitelist[anchor_id] then
anchor_id = mw.uri.anchorEncode (anchor_id)
article_whitelist[anchor_id] = 1; -- add to the whitelist
end
end
elseif template_name and whitelist.wrapper_templates[template_name] then
anchor_id = anchor_id_make_wrapper (template); -- extract an anchor id from this template if possible
list_add (anchor_id, anchor_id_list, true);
elseif template_name and template_name:match ('^Cit[ea]') then -- not known, not known wrapper; last gasp, try as cs1-like
anchor_id = anchor_id_make_cs12 (template); -- extract an anchor id from this template if possible
list_add (anchor_id, anchor_id_list, true);
end
tstart, tend = article_content:find (find_pattern, tend); -- search for another template; begin at end of last search
end
mw.logObject (anchor_id_list, 'anchor_id_list');
mw.logObject (template_list, 'template_list');
mw.logObject (article_whitelist, 'article_whitelist');
global_anchor_id_list = anchor_id_list
global_template_list = template_list
global_article_whitelist = article_whitelist
end
--[[--------------------------< C I T E R E F _ P A T T E R N S _ M A K E >--------------------------------------------
Scans template_list to look for wrapper templates that generate citerefs that require Lua patterns.
This scan is only done once per page load, to save time
]]
local function citeref_patterns_make()
if not global_template_list then return end
local citeref_patterns = {}
local template_patterns = whitelist.wrapper_template_patterns
for _, p in ipairs(template_patterns) do
for _, t in ipairs(p[1]) do -- loop through list of template wrappers
if global_template_list[t] then -- if wrapper is found in article, record corresponding patterns
for _, pat in ipairs(p[2]) do
table.insert(citeref_patterns, pat)
end
break
end
end
end
mw.logObject(citeref_patterns,'citeref_patterns')
return citeref_patterns
end
--[[--------------------------< E X P O R T E D _ T A B L E S >------------------------------------------------
]]
-- First create global_anchor_id_list, global_template_list, global_article_whitelist
anchor_id_list_make()
-- Then stuff them (and derived tables) into return table
return {
anchor_id_list = global_anchor_id_list or {}, -- table of anchor ids available in this article
article_whitelist = global_article_whitelist or {}, -- table of anchor ids with false-positive error message to be suppressed
template_list = global_template_list or {}, -- table of templates used in this article
citeref_patterns = citeref_patterns_make() or {} -- table of Lua patterns to search for citeref from wrappers
}