Module:String2: Difference between revisions
From All Skies Encyclopaedia
| imported>Frietjes  (testing a titlecase function, ... not sure if this is tractable given number of articles, prepositions, etc which are always lower, but starting with a shortened list for testing) | imported>Gonnym   (fix) | ||
| (63 intermediate revisions by 13 users not shown) | |||
| Line 1: | Line 1: | ||
| require ('strict'); | |||
| local p = {} | local p = {} | ||
| p. | p.trim = function(frame) | ||
| 	return mw.text.trim(frame.args[1] or "") | |||
| 	return string.upper( s ) | |||
| end | end | ||
| p. | p.sentence = function (frame) | ||
| 	-- {{lc:}} is strip-marker safe, string.lower is not. | |||
| 	local s = mw.text.trim( frame.args[1] or "" ) | |||
| 	frame.args[1] = frame:callParserFunction('lc', frame.args[1]) | |||
| 	return string.lower( s ) | |||
| 	return p.ucfirst(frame) | |||
| end | end | ||
| p. | p.ucfirst = function (frame) | ||
| 	local s = | 	local s = frame.args[1]; | ||
| 	if not s or '' == s or s:match ('^%s+$') then								-- when <s> is nil, empty, or only whitespace | |||
| 	local strFirst = string.sub( s, 1, 1 ) | |||
| 		return s;																-- abandon because nothing to do | |||
| 	local strRest = string.sub( s, 2 ) | |||
| 	end | |||
| 	return string.upper( strFirst ) .. string.lower( strRest ) | |||
| 	s =  mw.text.trim( frame.args[1] or "" ) | |||
| 	local s1 = "" | |||
| 	local prefix_patterns_t = {													-- sequence of prefix patterns | |||
| 		'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',						-- stripmarker | |||
| 		'^([%*;:#]+)',															-- various list markup | |||
| 		'^(\'\'\'*)',															-- bold / italic markup | |||
| 		'^(%b<>)',																-- html-like tags because some templates render these | |||
| 		'^(&%a+;)',																-- html character entities because some templates render these | |||
| 		'^(&#%d+;)',															-- html numeric (decimal) entities because some templates render these | |||
| 		'^(&#x%x+;)',															-- html numeric (hexadecimal) entities because some templates render these | |||
| 		'^(%s+)',																-- any whitespace characters | |||
| 		'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)',					-- miscellaneous punctuation | |||
| 		} | |||
| 	local prefixes_t = {};														-- list, bold/italic, and html-like markup, & whitespace saved here | |||
| 	local function prefix_strip (s)												-- local function to strip prefixes from <s> | |||
| 		for _, pattern in ipairs (prefix_patterns_t) do							-- spin through <prefix_patterns_t>  | |||
| 			if s:match (pattern) then											-- when there is a match | |||
| 				local prefix = s:match (pattern);								-- get a copy of the matched prefix | |||
| 				table.insert (prefixes_t, prefix);								-- save it | |||
| 				s = s:sub (prefix:len() + 1);									-- remove the prefix from <s> | |||
| 				return s, true;													-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker | |||
| 			end | |||
| 		end | |||
| 		return s;																-- no prefix found; return <s> with nil flag | |||
| 	end | |||
| 	local prefix_removed;														-- flag; boolean true as long as prefix_strip() finds and removes a prefix | |||
| 	repeat																		-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s> | |||
| 		s, prefix_removed = prefix_strip (s); | |||
| 	until (not prefix_removed);													-- until <prefix_removed> is nil | |||
| 	s1 = table.concat (prefixes_t);												-- recreate the prefix string for later reattachment | |||
| 	local first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]');					-- extract wikilink at start of string if present; TODO: this can be string.match()? | |||
| 	local upcased; | |||
| 	if first_text then | |||
| 		if first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then						-- if <first_text> is a piped link | |||
| 			upcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)');				-- get first letter character | |||
| 			upcased = mw.ustring.upper (upcased);								-- upcase first letter character | |||
| 			s = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased);		-- replace | |||
| 		else																	-- here when <first_text> is a wikilink but not a piped link | |||
| 			upcased = mw.ustring.match (s, '^%[%[%W*%w');						-- get '[[' and first letter | |||
| 			upcased = mw.ustring.upper (upcased);								-- upcase first letter character | |||
| 			s = mw.ustring.gsub (s, '^%[%[%W*%w', upcased);						-- replace; no capture needed here | |||
| 		end | |||
| 	elseif s:match ('^%[%S+%s+[^%]]+%]') then									-- if <s> is a ext link of some sort; must have label text | |||
| 		upcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)');						-- get first letter character | |||
| 		upcased = mw.ustring.upper (upcased);									-- upcase first letter character | |||
| 		s = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased);			-- replace | |||
| 	elseif s:match ('^%[%S+%s*%]') then											-- if <s> is a ext link without label text; nothing to do | |||
| 		return s1 .. s;															-- reattach prefix string (if present) and done | |||
| 	else																		-- <s> is not a wikilink or ext link; assume plain text | |||
| 		upcased = mw.ustring.match (s, '^%W*%w');								-- get the first letter character | |||
| 		upcased = mw.ustring.upper (upcased);									-- upcase first letter character | |||
| 		s = mw.ustring.gsub (s, '^%W*%w', upcased);								-- replace; no capture needed here | |||
| 	end | |||
| 	return s1 .. s;																-- reattach prefix string (if present) and done | |||
| end | end | ||
| p.title = function (frame ) | |||
| p.title = function (frame) | |||
| 	local alwayslower = {['a'] = 1, ['an'] = 1, ['the'] = 1,  | |||
| 	-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html | |||
| 	-- recommended by The U.S. Government Printing Office Style Manual: | |||
| 	-- "Capitalize all words in titles of publications and documents, | |||
| 	-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor." | |||
| 	local alwayslower = {['a'] = 1, ['an'] = 1, ['the'] = 1, | |||
| 		['and'] = 1, ['but'] = 1, ['or'] = 1, ['for'] = 1, | 		['and'] = 1, ['but'] = 1, ['or'] = 1, ['for'] = 1, | ||
| 		['nor'] = 1, ['on'] = 1, ['at'] = 1, ['to'] = 1, | 		['nor'] = 1, ['on'] = 1, ['in'] = 1, ['at'] = 1, ['to'] = 1, | ||
| 		['from'] = 1, ['by'] = 1 } | 		['from'] = 1, ['by'] = 1, ['of'] = 1, ['up'] = 1 } | ||
| 	local res = '' | 	local res = '' | ||
| 	local s =  mw.text.trim( frame.args[1] or "" ) | 	local s =  mw.text.trim( frame.args[1] or "" ) | ||
| 	local words = mw.text.split( s, " ") | 	local words = mw.text.split( s, " ") | ||
| 	for i, s in ipairs(words) do | 	for i, s in ipairs(words) do | ||
| 		-- {{lc:}} is strip-marker safe, string.lower is not. | |||
| 		s = frame:callParserFunction('lc', s) | |||
| 		if( i > 1 ) then | |||
| 		if i == 1 or alwayslower[s] ~= 1 then | |||
| 				s = mw.getContentLanguage():ucfirst(s) | |||
| 			end | |||
| 		else | |||
| 			s = mw.getContentLanguage():ucfirst(s) | 			s = mw.getContentLanguage():ucfirst(s) | ||
| 		end | 		end | ||
| Line 38: | Line 108: | ||
| 	end | 	end | ||
| 	return table.concat(words, " ") | 	return table.concat(words, " ") | ||
| end | |||
| -- findlast finds the last item in a list | |||
| -- the first unnamed parameter is the list | |||
| -- the second, optional unnamed parameter is the list separator (default = comma space) | |||
| -- returns the whole list if separator not found | |||
| p.findlast = function(frame) | |||
| 	local s =  mw.text.trim( frame.args[1] or "" ) | |||
| 	local sep = frame.args[2] or "" | |||
| 	if sep == "" then sep = ", " end | |||
| 	local pattern = ".*" .. sep .. "(.*)" | |||
| 	local a, b, last = s:find(pattern) | |||
| 	if a then | |||
| 		return last | |||
| 	else | |||
| 		return s | |||
| 	end | |||
| end | |||
| -- stripZeros finds the first number and strips leading zeros (apart from units) | |||
| -- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12" | |||
| p.stripZeros = function(frame) | |||
| 	local s = mw.text.trim(frame.args[1] or "") | |||
| 	local n = tonumber( string.match( s, "%d+" ) ) or "" | |||
| 	s = string.gsub( s, "%d+", n, 1 ) | |||
| 	return s | |||
| end | |||
| -- nowiki ensures that a string of text is treated by the MediaWiki software as just a string | |||
| -- it takes an unnamed parameter and trims whitespace, then removes any wikicode | |||
| p.nowiki = function(frame) | |||
| 	local str = mw.text.trim(frame.args[1] or "") | |||
| 	return mw.text.nowiki(str) | |||
| end | |||
| -- split splits text at boundaries specified by separator | |||
| -- and returns the chunk for the index idx (starting at 1) | |||
| -- #invoke:String2 |split |text |separator |index |true/false | |||
| -- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false | |||
| -- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=true | |||
| p.split = function(frame) | |||
| 	local args = frame.args | |||
| 	if not(args[1] or args.txt) then args = frame:getParent().args end | |||
| 	local txt = args[1] or args.txt or "" | |||
| 	if txt == "" then return nil end | |||
| 	local sep = (args[2] or args.sep or ""):gsub('"', '') | |||
| 	local idx = tonumber(args[3] or args.idx) or 1 | |||
| 	local plain = (args[4] or args.plain or "true"):sub(1,1) | |||
| 	plain = (plain ~= "f" and plain ~= "n" and plain ~= "0") | |||
| 	local splittbl = mw.text.split( txt, sep, plain ) | |||
| 	if idx < 0 then idx = #splittbl + idx + 1 end | |||
| 	return splittbl[idx] | |||
| end | |||
| -- val2percent scans through a string, passed as either the first unnamed parameter or |txt= | |||
| -- it converts each number it finds into a percentage and returns the resultant string. | |||
| p.val2percent = function(frame) | |||
| 	local args = frame.args | |||
| 	if not(args[1] or args.txt) then args = frame:getParent().args end | |||
| 	local txt = mw.text.trim(args[1] or args.txt or "") | |||
| 	if txt == "" then return nil end | |||
| 	local function v2p (x) | |||
| 		x = (tonumber(x) or 0) * 100 | |||
| 		if x == math.floor(x) then x = math.floor(x) end | |||
| 		return x .. "%" | |||
| 	end | |||
| 	txt = txt:gsub("%d[%d%.]*", v2p) -- store just the string | |||
| 	return txt | |||
| end | |||
| -- one2a scans through a string, passed as either the first unnamed parameter or |txt= | |||
| -- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string. | |||
| p.one2a = function(frame) | |||
| 	local args = frame.args | |||
| 	if not(args[1] or args.txt) then args = frame:getParent().args end | |||
| 	local txt = mw.text.trim(args[1] or args.txt or "") | |||
| 	if txt == "" then return nil end | |||
| 	txt = txt:gsub(" one ", " a "):gsub("^one", "a"):gsub("One ", "A "):gsub("a ([aeiou])", "an %1"):gsub("A ([aeiou])", "An %1") | |||
| 	return txt | |||
| end | |||
| -- findpagetext returns the position of a piece of text in a page | |||
| -- First positional parameter or |text is the search text | |||
| -- Optional parameter |title is the page title, defaults to current page | |||
| -- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search | |||
| -- Optional parameter |nomatch is the return value when no match is found; default is nil | |||
| p._findpagetext = function(args) | |||
| 	-- process parameters | |||
| 	local nomatch = args.nomatch or "" | |||
| 	if nomatch == "" then nomatch = nil end | |||
| 	-- | |||
| 	local text = mw.text.trim(args[1] or args.text or "") | |||
| 	if text == "" then return nil end | |||
| 	-- | |||
| 	local title = args.title or "" | |||
| 	local titleobj | |||
| 	if title == "" then | |||
| 		titleobj = mw.title.getCurrentTitle() | |||
| 	else | |||
| 		titleobj = mw.title.new(title) | |||
| 	end | |||
| 	-- | |||
| 	local plain = args.plain or "" | |||
| 	if plain:sub(1, 1) == "f" then plain = false else plain = true end | |||
| 	-- get the page content and look for 'text' - return position or nomatch | |||
| 	local content = titleobj and titleobj:getContent() | |||
| 	return content and mw.ustring.find(content, text, 1, plain) or nomatch | |||
| end | |||
| p.findpagetext = function(frame) | |||
| 	local args = frame.args | |||
| 	local pargs = frame:getParent().args | |||
| 	for k, v in pairs(pargs) do | |||
| 		args[k] = v | |||
| 	end | |||
| 	if not (args[1] or args.text) then return nil end | |||
| 	-- just the first value | |||
| 	return (p._findpagetext(args)) | |||
| end | |||
| -- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}} | |||
| -- Type is: | |||
| -- QUERY decodes + to space (default) | |||
| -- PATH does no extra decoding | |||
| -- WIKI decodes _ to space | |||
| p._urldecode = function(url, type) | |||
| 	url = url or "" | |||
| 	type = (type == "PATH" or type == "WIKI") and type | |||
| 	return mw.uri.decode( url, type ) | |||
| end | |||
| -- {{#invoke:String2|urldecode|url=url|type=type}} | |||
| p.urldecode = function(frame) | |||
| 	return mw.uri.decode( frame.args.url, frame.args.type ) | |||
| end | |||
| -- what follows was merged from Module:StringFunc | |||
| -- helper functions | |||
| p._GetParameters = require('Module:GetParameters') | |||
| -- Argument list helper function, as per Module:String | |||
| p._getParameters = p._GetParameters.getParameters | |||
| -- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String | |||
| function p._escapePattern( pattern_str ) | |||
| 	return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ) | |||
| end | |||
| -- Helper Function to interpret boolean strings, as per Module:String | |||
| p._getBoolean = p._GetParameters.getBoolean | |||
| --[[ | |||
| Strip | |||
| This function Strips characters from string | |||
| Usage: | |||
| {{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}} | |||
| Parameters | |||
| 	source: The string to strip | |||
| 	chars:  The pattern or list of characters to strip from string, replaced with '' | |||
| 	plain:  A flag indicating that the chars should be understood as plain text. defaults to true. | |||
| Leading and trailing whitespace is also automatically stripped from the string. | |||
| ]] | |||
| function p.strip( frame ) | |||
| 	local new_args = p._getParameters( frame.args,  {'source', 'chars', 'plain'} ) | |||
| 	local source_str = new_args['source'] or '' | |||
| 	local chars = new_args['chars'] or '' or 'characters' | |||
| 	source_str = mw.text.trim(source_str) | |||
| 	if source_str == '' or chars == '' then | |||
| 		return source_str | |||
| 	end | |||
| 	local l_plain = p._getBoolean( new_args['plain'] or true ) | |||
| 	if l_plain then | |||
| 		chars = p._escapePattern( chars ) | |||
| 	end | |||
| 	local result | |||
| 	result = mw.ustring.gsub(source_str, "["..chars.."]", '') | |||
| 	return result | |||
| end | |||
| --[[ | |||
| Match any | |||
| Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered. | |||
| Returns the empty string if nothing matches for use in {{#if:}} | |||
| Usage: | |||
| 	{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'. | |||
| Parameters: | |||
| 	source: the string to search | |||
| 	plain:  A flag indicating that the patterns should be understood as plain text. defaults to true. | |||
| 	1, 2, 3, ...: the patterns to search for | |||
| ]] | |||
| function p.matchAny(frame) | |||
| 	local source_str = frame.args['source'] or error('The source parameter is mandatory.') | |||
| 	local l_plain = p._getBoolean( frame.args['plain'] or true ) | |||
| 	for i = 1, math.huge do | |||
| 		local pattern = frame.args[i] | |||
| 		if not pattern then return '' end | |||
| 		if mw.ustring.find(source_str, pattern, 1, l_plain) then | |||
| 			return tostring(i) | |||
| 		end | |||
| 	end | |||
| end | |||
| --[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------- | |||
| Converts a hyphen to a dash under certain conditions.  The hyphen must separate | |||
| like items; unlike items are returned unmodified.  These forms are modified: | |||
| 	letter - letter (A - B) | |||
| 	digit - digit (4-5) | |||
| 	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) | |||
| 	letterdigit - letterdigit (A1-A5) (an optional separator between letter and | |||
| 		digit is supported – a.1-a.5 or a-1-a-5) | |||
| 	digitletter - digitletter (5a - 5d) (an optional separator between letter and | |||
| 		digit is supported – 5.a-5.d or 5-a-5-d) | |||
| any other forms are returned unmodified. | |||
| str may be a comma- or semicolon-separated list | |||
| ]] | |||
| function p.hyphen_to_dash( str, spacing ) | |||
| 	if (str == nil or str == '') then | |||
| 		return str | |||
| 	end | |||
| 	local accept | |||
| 	str = mw.text.decode(str, true )											-- replace html entities with their characters; semicolon mucks up the text.split | |||
| 	local out = {} | |||
| 	local list = mw.text.split (str, '%s*[,;]%s*')								-- split str at comma or semicolon separators if there are any | |||
| 	for _, item in ipairs (list) do												-- for each item in the list | |||
| 		item = mw.text.trim(item)												-- trim whitespace | |||
| 		item, accept = item:gsub ('^%(%((.+)%)%)$', '%1') | |||
| 		if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then	-- if a hyphenated range or has endash or emdash separators | |||
| 			if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or			-- letterdigit hyphen letterdigit (optional separator between letter and digit) | |||
| 				item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or			-- digitletter hyphen digitletter (optional separator between digit and letter) | |||
| 				item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or			-- digit separator digit hyphen digit separator digit | |||
| 				item:match ('^%d+%s*%-%s*%d+$') or								-- digit hyphen digit | |||
| 				item:match ('^%a+%s*%-%s*%a+$') then							-- letter hyphen letter | |||
| 					item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')	-- replace hyphen, remove extraneous space characters | |||
| 			else | |||
| 				item = mw.ustring.gsub (item, '%s*[–—]%s*', '–')				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace | |||
| 			end | |||
| 		end | |||
| 		table.insert (out, item)												-- add the (possibly modified) item to the output table | |||
| 	end | |||
| 	local temp_str = table.concat (out, ',' .. spacing)							-- concatenate the output table into a comma separated string | |||
| 	temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')					-- remove accept-this-as-written markup when it wraps all of concatenated out | |||
| 	if accept ~= 0 then | |||
| 		temp_str = str:gsub ('^%(%((.+)%)%)$', '%1')							-- when global markup removed, return original str; do it this way to suppress boolean second return value | |||
| 	end | |||
| 	return temp_str | |||
| end | |||
| function p.hyphen2dash( frame ) | |||
| 	local str = frame.args[1] or '' | |||
| 	local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing | |||
| 	return p.hyphen_to_dash(str, spacing) | |||
| end | |||
| -- Similar to [[Module:String#endswith]] | |||
| function p.startswith(frame) | |||
| 	return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or '' | |||
| end | |||
| -- Implements [[Template:Isnumeric]] | |||
| function p.isnumeric(frame) | |||
| 	local s = frame.args[1] or frame:getParent().args[1] | |||
| 	local boolean = (frame.args.boolean or frame:getParent().args.boolean) == 'true' | |||
| 	if type(s) == 'string' and mw.getContentLanguage():parseFormattedNumber( s ) then | |||
| 		return boolean and 1 or s | |||
| 	end | |||
| 	return boolean and 0 or '' | |||
| end | |||
| -- Checks if a value in a group of numbers is not an interger. | |||
| -- Allows usage of an |empty= parameter to allow empty values to be skipped. | |||
| function p.isInteger(frame) | |||
| 	local values = frame.args or frame:getParent().args | |||
| 	local allow_empty = frame.args.empty or frame:getParent().args.empty | |||
| 	for _, value in ipairs(values) do | |||
| 		-- Trim spaces | |||
| 		value = value and value:gsub("^%s*(.-)%s*$", "%1") | |||
| 		if value == "" or value == nil then | |||
| 			if not allow_empty then | |||
| 				return false  -- Empty values are not allowed | |||
| 			end | |||
| 		else | |||
| 			value = tonumber(value) | |||
| 			if not (type(value) == "number" and value == math.floor(value)) then | |||
| 				return false | |||
| 			end | |||
| 		end | |||
| 	end | |||
| 	return true | |||
| end | |||
| -- Returns an error found in a string. | |||
| function p.getError(frame) | |||
| 	local text = frame.args[1] or frame:getParent().args[1] | |||
| 	local error_message = text:match('(<strong class="error">.-</strong>)') | |||
| 	return error_message or nil | |||
| end | end | ||
Latest revision as of 12:24, 19 February 2025
Documentation for this module may be created at Module:String2/doc
require ('strict');
local p = {}
p.trim = function(frame)
	return mw.text.trim(frame.args[1] or "")
end
p.sentence = function (frame)
	-- {{lc:}} is strip-marker safe, string.lower is not.
	frame.args[1] = frame:callParserFunction('lc', frame.args[1])
	return p.ucfirst(frame)
end
p.ucfirst = function (frame)
	local s = frame.args[1];
	if not s or '' == s or s:match ('^%s+$') then								-- when <s> is nil, empty, or only whitespace
		return s;																-- abandon because nothing to do
	end
	s =  mw.text.trim( frame.args[1] or "" )
	local s1 = ""
	local prefix_patterns_t = {													-- sequence of prefix patterns
		'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',						-- stripmarker
		'^([%*;:#]+)',															-- various list markup
		'^(\'\'\'*)',															-- bold / italic markup
		'^(%b<>)',																-- html-like tags because some templates render these
		'^(&%a+;)',																-- html character entities because some templates render these
		'^(&#%d+;)',															-- html numeric (decimal) entities because some templates render these
		'^(&#x%x+;)',															-- html numeric (hexadecimal) entities because some templates render these
		'^(%s+)',																-- any whitespace characters
		'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)',					-- miscellaneous punctuation
		}
	
	local prefixes_t = {};														-- list, bold/italic, and html-like markup, & whitespace saved here
	local function prefix_strip (s)												-- local function to strip prefixes from <s>
		for _, pattern in ipairs (prefix_patterns_t) do							-- spin through <prefix_patterns_t> 
			if s:match (pattern) then											-- when there is a match
				local prefix = s:match (pattern);								-- get a copy of the matched prefix
				table.insert (prefixes_t, prefix);								-- save it
				s = s:sub (prefix:len() + 1);									-- remove the prefix from <s>
				return s, true;													-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker
			end
		end
		return s;																-- no prefix found; return <s> with nil flag
	end
	local prefix_removed;														-- flag; boolean true as long as prefix_strip() finds and removes a prefix
	
	repeat																		-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>
		s, prefix_removed = prefix_strip (s);
	until (not prefix_removed);													-- until <prefix_removed> is nil
	s1 = table.concat (prefixes_t);												-- recreate the prefix string for later reattachment
	local first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]');					-- extract wikilink at start of string if present; TODO: this can be string.match()?
	local upcased;
	if first_text then
		if first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then						-- if <first_text> is a piped link
			upcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)');				-- get first letter character
			upcased = mw.ustring.upper (upcased);								-- upcase first letter character
			s = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased);		-- replace
		else																	-- here when <first_text> is a wikilink but not a piped link
			upcased = mw.ustring.match (s, '^%[%[%W*%w');						-- get '[[' and first letter
			upcased = mw.ustring.upper (upcased);								-- upcase first letter character
			s = mw.ustring.gsub (s, '^%[%[%W*%w', upcased);						-- replace; no capture needed here
		end
	elseif s:match ('^%[%S+%s+[^%]]+%]') then									-- if <s> is a ext link of some sort; must have label text
		upcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)');						-- get first letter character
		upcased = mw.ustring.upper (upcased);									-- upcase first letter character
		s = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased);			-- replace
	
	elseif s:match ('^%[%S+%s*%]') then											-- if <s> is a ext link without label text; nothing to do
		return s1 .. s;															-- reattach prefix string (if present) and done
	else																		-- <s> is not a wikilink or ext link; assume plain text
		upcased = mw.ustring.match (s, '^%W*%w');								-- get the first letter character
		upcased = mw.ustring.upper (upcased);									-- upcase first letter character
		s = mw.ustring.gsub (s, '^%W*%w', upcased);								-- replace; no capture needed here
	end
	return s1 .. s;																-- reattach prefix string (if present) and done
end
p.title = function (frame)
	-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html
	-- recommended by The U.S. Government Printing Office Style Manual:
	-- "Capitalize all words in titles of publications and documents,
	-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor."
	local alwayslower = {['a'] = 1, ['an'] = 1, ['the'] = 1,
		['and'] = 1, ['but'] = 1, ['or'] = 1, ['for'] = 1,
		['nor'] = 1, ['on'] = 1, ['in'] = 1, ['at'] = 1, ['to'] = 1,
		['from'] = 1, ['by'] = 1, ['of'] = 1, ['up'] = 1 }
	local res = ''
	local s =  mw.text.trim( frame.args[1] or "" )
	local words = mw.text.split( s, " ")
	for i, s in ipairs(words) do
		-- {{lc:}} is strip-marker safe, string.lower is not.
		s = frame:callParserFunction('lc', s)
		if i == 1 or alwayslower[s] ~= 1 then
			s = mw.getContentLanguage():ucfirst(s)
		end
		words[i] = s
	end
	return table.concat(words, " ")
end
-- findlast finds the last item in a list
-- the first unnamed parameter is the list
-- the second, optional unnamed parameter is the list separator (default = comma space)
-- returns the whole list if separator not found
p.findlast = function(frame)
	local s =  mw.text.trim( frame.args[1] or "" )
	local sep = frame.args[2] or ""
	if sep == "" then sep = ", " end
	local pattern = ".*" .. sep .. "(.*)"
	local a, b, last = s:find(pattern)
	if a then
		return last
	else
		return s
	end
end
-- stripZeros finds the first number and strips leading zeros (apart from units)
-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"
p.stripZeros = function(frame)
	local s = mw.text.trim(frame.args[1] or "")
	local n = tonumber( string.match( s, "%d+" ) ) or ""
	s = string.gsub( s, "%d+", n, 1 )
	return s
end
-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string
-- it takes an unnamed parameter and trims whitespace, then removes any wikicode
p.nowiki = function(frame)
	local str = mw.text.trim(frame.args[1] or "")
	return mw.text.nowiki(str)
end
-- split splits text at boundaries specified by separator
-- and returns the chunk for the index idx (starting at 1)
-- #invoke:String2 |split |text |separator |index |true/false
-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false
-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=true
p.split = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = args[1] or args.txt or ""
	if txt == "" then return nil end
	local sep = (args[2] or args.sep or ""):gsub('"', '')
	local idx = tonumber(args[3] or args.idx) or 1
	local plain = (args[4] or args.plain or "true"):sub(1,1)
	plain = (plain ~= "f" and plain ~= "n" and plain ~= "0")
	local splittbl = mw.text.split( txt, sep, plain )
	if idx < 0 then idx = #splittbl + idx + 1 end
	return splittbl[idx]
end
-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each number it finds into a percentage and returns the resultant string.
p.val2percent = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = mw.text.trim(args[1] or args.txt or "")
	if txt == "" then return nil end
	local function v2p (x)
		x = (tonumber(x) or 0) * 100
		if x == math.floor(x) then x = math.floor(x) end
		return x .. "%"
	end
	txt = txt:gsub("%d[%d%.]*", v2p) -- store just the string
	return txt
end
-- one2a scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.
p.one2a = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = mw.text.trim(args[1] or args.txt or "")
	if txt == "" then return nil end
	txt = txt:gsub(" one ", " a "):gsub("^one", "a"):gsub("One ", "A "):gsub("a ([aeiou])", "an %1"):gsub("A ([aeiou])", "An %1")
	return txt
end
-- findpagetext returns the position of a piece of text in a page
-- First positional parameter or |text is the search text
-- Optional parameter |title is the page title, defaults to current page
-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search
-- Optional parameter |nomatch is the return value when no match is found; default is nil
p._findpagetext = function(args)
	-- process parameters
	local nomatch = args.nomatch or ""
	if nomatch == "" then nomatch = nil end
	--
	local text = mw.text.trim(args[1] or args.text or "")
	if text == "" then return nil end
	--
	local title = args.title or ""
	local titleobj
	if title == "" then
		titleobj = mw.title.getCurrentTitle()
	else
		titleobj = mw.title.new(title)
	end
	--
	local plain = args.plain or ""
	if plain:sub(1, 1) == "f" then plain = false else plain = true end
	-- get the page content and look for 'text' - return position or nomatch
	local content = titleobj and titleobj:getContent()
	return content and mw.ustring.find(content, text, 1, plain) or nomatch
end
p.findpagetext = function(frame)
	local args = frame.args
	local pargs = frame:getParent().args
	for k, v in pairs(pargs) do
		args[k] = v
	end
	if not (args[1] or args.text) then return nil end
	-- just the first value
	return (p._findpagetext(args))
end
-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}
-- Type is:
-- QUERY decodes + to space (default)
-- PATH does no extra decoding
-- WIKI decodes _ to space
p._urldecode = function(url, type)
	url = url or ""
	type = (type == "PATH" or type == "WIKI") and type
	return mw.uri.decode( url, type )
end
-- {{#invoke:String2|urldecode|url=url|type=type}}
p.urldecode = function(frame)
	return mw.uri.decode( frame.args.url, frame.args.type )
end
-- what follows was merged from Module:StringFunc
-- helper functions
p._GetParameters = require('Module:GetParameters')
-- Argument list helper function, as per Module:String
p._getParameters = p._GetParameters.getParameters
-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
function p._escapePattern( pattern_str )
	return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
end
-- Helper Function to interpret boolean strings, as per Module:String
p._getBoolean = p._GetParameters.getBoolean
--[[
Strip
This function Strips characters from string
Usage:
{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}
Parameters
	source: The string to strip
	chars:  The pattern or list of characters to strip from string, replaced with ''
	plain:  A flag indicating that the chars should be understood as plain text. defaults to true.
Leading and trailing whitespace is also automatically stripped from the string.
]]
function p.strip( frame )
	local new_args = p._getParameters( frame.args,  {'source', 'chars', 'plain'} )
	local source_str = new_args['source'] or ''
	local chars = new_args['chars'] or '' or 'characters'
	source_str = mw.text.trim(source_str)
	if source_str == '' or chars == '' then
		return source_str
	end
	local l_plain = p._getBoolean( new_args['plain'] or true )
	if l_plain then
		chars = p._escapePattern( chars )
	end
	local result
	result = mw.ustring.gsub(source_str, "["..chars.."]", '')
	return result
end
--[[
Match any
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.
Returns the empty string if nothing matches for use in {{#if:}}
Usage:
	{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.
Parameters:
	source: the string to search
	plain:  A flag indicating that the patterns should be understood as plain text. defaults to true.
	1, 2, 3, ...: the patterns to search for
]]
function p.matchAny(frame)
	local source_str = frame.args['source'] or error('The source parameter is mandatory.')
	local l_plain = p._getBoolean( frame.args['plain'] or true )
	for i = 1, math.huge do
		local pattern = frame.args[i]
		if not pattern then return '' end
		if mw.ustring.find(source_str, pattern, 1, l_plain) then
			return tostring(i)
		end
	end
end
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
	letter - letter (A - B)
	digit - digit (4-5)
	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
	letterdigit - letterdigit (A1-A5) (an optional separator between letter and
		digit is supported – a.1-a.5 or a-1-a-5)
	digitletter - digitletter (5a - 5d) (an optional separator between letter and
		digit is supported – 5.a-5.d or 5-a-5-d)
any other forms are returned unmodified.
str may be a comma- or semicolon-separated list
]]
function p.hyphen_to_dash( str, spacing )
	if (str == nil or str == '') then
		return str
	end
	local accept
	str = mw.text.decode(str, true )											-- replace html entities with their characters; semicolon mucks up the text.split
	local out = {}
	local list = mw.text.split (str, '%s*[,;]%s*')								-- split str at comma or semicolon separators if there are any
	for _, item in ipairs (list) do												-- for each item in the list
		item = mw.text.trim(item)												-- trim whitespace
		item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
		if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then	-- if a hyphenated range or has endash or emdash separators
			if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or			-- letterdigit hyphen letterdigit (optional separator between letter and digit)
				item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or			-- digitletter hyphen digitletter (optional separator between digit and letter)
				item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or			-- digit separator digit hyphen digit separator digit
				item:match ('^%d+%s*%-%s*%d+$') or								-- digit hyphen digit
				item:match ('^%a+%s*%-%s*%a+$') then							-- letter hyphen letter
					item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')	-- replace hyphen, remove extraneous space characters
			else
				item = mw.ustring.gsub (item, '%s*[–—]%s*', '–')				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
			end
		end
		table.insert (out, item)												-- add the (possibly modified) item to the output table
	end
	local temp_str = table.concat (out, ',' .. spacing)							-- concatenate the output table into a comma separated string
	temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')					-- remove accept-this-as-written markup when it wraps all of concatenated out
	if accept ~= 0 then
		temp_str = str:gsub ('^%(%((.+)%)%)$', '%1')							-- when global markup removed, return original str; do it this way to suppress boolean second return value
	end
	return temp_str
end
function p.hyphen2dash( frame )
	local str = frame.args[1] or ''
	local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing
	return p.hyphen_to_dash(str, spacing)
end
-- Similar to [[Module:String#endswith]]
function p.startswith(frame)
	return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or ''
end
-- Implements [[Template:Isnumeric]]
function p.isnumeric(frame)
	local s = frame.args[1] or frame:getParent().args[1]
	local boolean = (frame.args.boolean or frame:getParent().args.boolean) == 'true'
	if type(s) == 'string' and mw.getContentLanguage():parseFormattedNumber( s ) then
		return boolean and 1 or s
	end
	return boolean and 0 or ''
end
-- Checks if a value in a group of numbers is not an interger.
-- Allows usage of an |empty= parameter to allow empty values to be skipped.
function p.isInteger(frame)
	local values = frame.args or frame:getParent().args
	local allow_empty = frame.args.empty or frame:getParent().args.empty
	for _, value in ipairs(values) do
		-- Trim spaces
		value = value and value:gsub("^%s*(.-)%s*$", "%1")
		if value == "" or value == nil then
			if not allow_empty then
				return false  -- Empty values are not allowed
			end
		else
			value = tonumber(value)
			if not (type(value) == "number" and value == math.floor(value)) then
				return false
			end
		end
	end
	return true
end
-- Returns an error found in a string.
function p.getError(frame)
	local text = frame.args[1] or frame:getParent().args[1]
	local error_message = text:match('(<strong class="error">.-</strong>)')
	return error_message or nil
end
return p







