Module:Footnotes/anchor id list: Difference between revisions

From All Skies Encyclopaedia
imported>Trappist the monk
No edit summary
imported>Trappist the monk
No edit summary
Line 292: Line 292:
local article_content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
local article_content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
article_content = article_content:gsub ('<nowiki>%s*{{%s*[Cc]it[ea].-}}%s*</nowiki>', ''); -- remove cite templates inside nowiki tags
article_content = article_content:gsub ('<nowiki>%s*{{%s*[Cc]it[ea].-}}%s*</nowiki>', ''); -- remove cite templates inside nowiki tags
article_content = article_content:gsub ('<!%-%-.+%-%->', ''); -- remove html comments and their content
article_content = article_content:gsub ('<!%-%-.-%-%->', ''); -- remove html comments and their content
if '' == article_content then -- when there is no article content
if '' == article_content then -- when there is no article content

Revision as of 14:04, 2 March 2020

Documentation for this module may be created at Module:Footnotes/anchor id list/doc

require('Module:No globals');

local citerefs = {};
local redirects_citation = {
	['citation'] = true,
	['cite'] = true,
	['cite citation'] = true,
	['cite study'] = true,
	['cite technical standard'] = true,
	}
local redirects_sfnref = {
	['sfnref'] = true,
	['harvid'] = true,
	}
local aliases_contributor = {													-- these use pseudo-patterns in the same way as cs1|2; '#' represents 1 or more enumerator digits
	'contributor#',
	'contributor-last#',
	'contributor#-last',
	'contributor-surname#',
	'contributor#-surname',
	}
local aliases_author = {
	'last#',
	'author#',
	'surname#',
	'author-last#',
	'author#-last',
	'subject#',
	'host#',
	}
local aliases_editor = {
	'editor#',
	'editor-last#',
	'editor#-last',
	'editor-surname#',
	'editor#-surname',
	}
local aliases_date = {
	'year',
	'date',
	'publicationdate',
	'publication-date',
	}
local patterns_date = {															-- normal lua patterns
	'^(%d%d%d%d–%d%d%d%d%l?)$',													-- YYYY–YYYY four-digit year range; with or without dab
	'^(%d%d%d%d–%d%d%l?)$',														-- YYYY–YY two-digit year range; with or without dab
	'(%d%d%d%d?%l?)$',															-- three- or four-digit year at end of date (dmy or mdy); with or without dab
	'^(%d%d%d%d?%l?)',															-- three- or four-digit year at end of date (ymd or YYYY); with or without dab
	'^(n%.d%.%l?)$',															-- 'no date' with dots; with or without dab
	'^(nd%l?)$',																-- 'no date' without dots; with or without dab
	'^(c%. %d%d%d%d?%l?$)$',													-- three- or four-digit circa year; with or without dab
	}


--[[--------------------------< S F N R E F _ G E T >----------------------------------------------------------

make a CITEREF from the contents of {{sfnref}} or {{harvid}}.  this function assumes that {{sfnref}} and {{harvid}}
are correctly formed.

]]

local function sfnref_get (template)
	template = template:gsub ('{{%s*(.-)%s*}}', '%1');							-- strip bounding template markup and trim
	local parts = mw.text.split (template, '%s*|%s*');							-- split at the pipe and remove extraneous space characters

	if redirects_sfnref[parts[1]:lower()] then
		return table.concat (parts, '', 2);										-- assume that sfnref template is properly formed, concatenate and done
	end

	return nil;
end


--[[--------------------------< D A T E _ G E T >--------------------------------------------------------------

extract year from one of |year=, |date=, |publicationdate=, or |publication-date in that order.  Does not error
check (that is left to the cs1|2 templates to do)

]]

local function date_get (params)
	local date;
	
	for _, alias in ipairs (aliases_date) do									-- spin through the date aliases
		if params[alias] then													-- if cs1|2 template has this date-alias parameter
			for _, pattern in ipairs (patterns_date) do							-- spin through the recognized date formats
				date = params[alias]:match (pattern);							-- has a year-portion is recognized; nil else
				if date then
					return date;												-- matched so return
				end
			end
		end
	end

	return '';																	-- no date param or date param doesn't hold a recognized date; empty string for concatenation
end

 
--[[--------------------------< V N A M E S _ G E T >----------------------------------------------------------



]]

local function vnames_get (params, vparam)
	local vnames = {};															-- first four author or editor names go here

	if params[vparam] then														-- test for |vauthors= or |veditor=
		vnames = mw.text.split (params[vparam], '%s*,%s*');
		vnames[5] = nil;														-- limit following loops to four items
		for i, vname in ipairs (vnames) do
			if not vname:match ('%(%(.-%)%)') then
				vnames[i] = vname:gsub ('(.-)%s+%u+', '%1');
			end
		end
		for i, vname in ipairs (vnames) do
			if vname:match ('%(%(.-%)%)') then
				vnames[i] = vname:gsub ('%(%((.-)%)%)', '%1');
			end
		end
	end

	return 0 ~= #vnames and table.concat (vnames) or nil						-- return a concatenation of the vnames; nil else
end


--[[--------------------------< N A M E S _ G E T >------------------------------------------------------------

cs1|2 makes CITEREF anchor from contributor, author, or editor name-lists in that order

get the names from the cs1|2 template;  if there are no contributor name, try author names then try editor names.

returns concatenated names in enumeration order when successful; nil else

missing names (missing or empty |lastn= parameter) are omitted but the other names are included.

]]

local function names_get (params, aliases_list)
	local names = {};															-- first four author or editor names go here
	local enum_alias;															-- alias with '#' replaced with a digit

	for i, alias in ipairs (aliases_list) do
		for enum=1, 4 do
			enum_alias = alias:gsub ('#', enum);								-- replace '#' to make 'lastn'
			if 1 == enum then													-- because |last= and last1= are exact aliases
				if params[enum_alias] then										-- test |last1= first
					names[enum] = params[enum_alias];							-- found so save the value assigned to |last1=
				else
					enum_alias = alias:gsub ('#', '');							-- replace '#' to make 'last'
					if params[enum_alias] then
						names[enum] = params[enum_alias];						-- found so save the value assigned to |last=
					end
				end
			else																-- here for enum 2, 3, 4
				if params[enum_alias] then
					names[enum] = params[enum_alias];							-- found so save the value assigned to |lastn=
				end
			end
		end		
	end
	
	return 0 ~= #names and table.concat (names) or nil							-- return a concatenation of the names; nil else
end


--[[--------------------------< T E M P L A T E _ S T R I P >--------------------------------------------------

Templates are not allowed in parameters that are made part of COinS metadata; yet, they will appear.  cs1|2 does
not see the template markup but instead sees the result of the template as html.  cs1|2 strips the html which
leaves the displayed value for the CITEREF.  We can't necessarily do that here so, because templates aren't allowed
in parameters, we simply discard any templates found in the cs1|2 template.

this may leave a |lastn= parameter empty which will be treated as if it were really empty as cs1|2 do (three authors,
|last2= empty -> CITEREFLast1Last3YYYY (the harv and sfn render: 'Last1, & Last3 YYYY' with CITEREFLast1Last3YYYY).

]]

local function template_strip (template)
	template = template:gsub ('^{{', ''):gsub ('}}$', '', 1);					-- remove outer {{ and }} (cs1|2 template delimiters)
	template = template:gsub ('%b{}', '');										-- remove any templates from the cs1|2 template
	return template;
end


--[=[-------------------------< W I K I L I N K _ S T R I P >--------------------------------------------------

Wikilink markup des not belong in CITEREF and can / does confuse the code that parses apart the cs1|2 template so
here we remove any wiki markup:
	[[link|label]] -> label text
	[[link]] -> link text
	
]=]

local function wikilink_strip (template)
	template = template:gsub ('%[%[.-|(.-)%]%]', '%1');							-- replace complex [[link|lable]] wikilinks with label text
	template = template:gsub ('%[%[(.-)%]%]', '%1');							-- replace simple [[link]] wikilinks with link text
	return template;
end


--[[--------------------------< C I T E R E F _ M A K E >------------------------------------------------------

inspect |ref= to decide what to do:
	|ref=harv									- get names and date from template parameters
	|ref=CITEREF...								- take everything after 'CITEREF'
	|ref={{SfnRef|name|name|name|name|year}}	- assemble CITEREF from positional parameters
	|ref={{Harvid|name|name|name|name|year}}	- assemble CITEREF from positional parameters
	|ref=none									- skip; do nothing because CITEREF intentionally suppressed; TODO: keep with a type code of '0'?
	|ref=										- empty or missing
													for cs1: skip
														if |mode=cs2: spoof |ref=harv
													for cs2: get names and date from template parameters
														if |mode=cs1: skip

	|ref=???									- skip; not CITEREF compatible but TODO: the harv and sfn templates support |ref= which can hold anything

]]

local function citeref_make (template)
	local ref;																	-- content of |ref=
	local template_name;														-- name of the template for cs2 detection
	local citeref;																-- the assembled CITEREF from this template
	local params = {};															-- table of cs1|2 parameters
	
	template_name = template:match ('{{%s*([^|]+)'):gsub ('%s*$', ''):lower();	-- get lowercase trimmed template name

	ref = template:match ('|%s*ref%s*=%s*(%b{})');								-- first look for |ref={{sfnref}} or |ref={{harvid}}
	if not ref then																-- |ref={{template}} not found
		if template:match ('|%s*ref%s*=([^|}]+)') then							-- if there is a |ref= param with an assigned value
			ref = template:match ('|%s*ref%s*=([^|}]+)'):gsub ('%s*(.-)%s*', '%1');	-- because we will strip templates from the cs1|2 template and ref migh hold |ref={{sfnref}} or |ref={{harvid}}
		else																	-- here when |ref= missing or empty
			if redirects_citation[template_name] then							-- could be cs2
				if template:match ('|%s*mode%s*=%s*cs1') then
					return nil;													-- |ref= missing or empty; citation template but |mode=cs1
				else
					ref = 'harv';												-- spoof to handle cs2 as if it were cs1 with |ref=harv
				end
			else																-- |ref= missing or empty; not a cs2 template
				if template:match ('|%s*mode%s*=%s*cs2') then
					ref = 'harv';												-- |ref= missing or empty; not a cs2 template; |mode=cs2; spoof as if it were cs1 with |ref=harv
				else
					return nil;													-- |ref= missing or empty; a cs1 template
				end
			end
		end
	end
	
	template = wikilink_strip (template);										-- because piped wikilinks confuses code that builds params{} and because plain wikilinks not allowed in CITEREF
																				-- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed
	template = template_strip (template);										-- because template markup can confuse code that builds params{} and because templates in name parameters are not allowed

	for param, value in template:gmatch ('|%s*([^=]-)%s*=%s*([^|}]+)') do		-- build a table of template parameters and their values
		params[param] = mw.text.trim (value);
	end
	
	if 'harv' == ref then														-- |ref=harv
		citeref = names_get (params, aliases_contributor) or					-- get contributor, author, or editor names
			names_get (params, aliases_author) or
			vnames_get (params, 'vauthors') or									-- |vauthors=
			names_get (params, aliases_editor) or
			vnames_get (params, 'veditors');									-- |veditors=

--error (citeref)
		if citeref then															-- if names were gotten
			citeref = citeref .. date_get (params);								-- get date portion
		end

	elseif ref:match ('CITEREF(.+)') then										-- for hand-created CITEREFs, take everything but the 'CITEREF' prefix
		citeref = ref:match ('CITEREF(.+)');
		
	elseif ref:match ('%b{}') then												-- ref holds a template
		citeref = sfnref_get (ref);												-- returns content of {{sfnref}} or {{harvid}}; nil else

	elseif 'none' == ref then													-- |ref=none
		return nil;																-- CITEREF expicitly suppressed
		
	elseif '' ~= ref then														-- |ref=??? TODO: keep this content because {{harv}} etc has |ref= parameter that overrides CITEREF?
		return nil;																-- text other than something acceptible as a CITEREF
	end
	
	return citeref;																-- citeref text without 'CITEREF' prefix if found or decoded; nil else
end


--[[--------------------------< H A R V _ L I N K _ T E S T >--------------------------------------------------


]]

local function harv_link_test ()
	local article_content = mw.title.getCurrentTitle():getContent() or '';		-- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
	article_content = article_content:gsub ('<nowiki>%s*{{%s*[Cc]it[ea].-}}%s*</nowiki>', '');	-- remove cite templates inside nowiki tags
	article_content = article_content:gsub ('<!%-%-.-%-%->', '');				-- remove html comments and their content
	
	if '' == article_content then												-- when there is no article content
		return '';																-- no point in continuing
	end
	
	local tstart, tend, _ = article_content:find ('{{%s*[Vv]?[Cc]it[ae]');		-- find the first cs1|2 template (or vcite xxx template)
	local template;																-- place to hold the template that we found
	local citeref;																-- place to hold CITEREFs as they are extracted / decoded

	while tstart do																-- nil when cs1|2 / vcite template not found
		template = article_content:match ('%b{}', tstart);						-- get the whole template

		if template then														-- necessary?
			citeref = citeref_make (template);									-- extract CITEREF from this template
			if citeref then														-- if there was a CITEREF extracted
				citeref = mw.uri.anchorEncode (citeref);						-- encode to remove wikimarkup, convert spaces to underscores etc
				if not citerefs[citeref] then									-- if not already saved
					citerefs[citeref] = 1;										-- save it 
				else															-- here when this CITEREF already saved
					citerefs[citeref] = 2;										-- to indicate that there are multiple same name/date citations
				end
			end
		end
		tstart = tend;															-- reset the search starting index
		tstart, tend, _ = article_content:find ('{{%s*[Vv]?[Cc]it[ae]', tstart);	-- search for another cs1|2 / vcite template
	end
	
--	return mw.dumpObject (citerefs);
	return citerefs;
end


--[[--------------------------< E X P O R T E D _ T A B L E S >------------------------------------------------
]]

return {
	citerefs = harv_link_test (),														-- table of CITEREFs available in this article
--	harv_link_test = harv_link_test,	-- FOR DEBUG ONLY
	}