Module:String2: Difference between revisions

From All Skies Encyclopaedia
imported>Elli
(update from sandbox - merge in strip function from module:stringfunc)
imported>Ahecht
(add p.isnumeric, which is a more WP:PEIS-efficient implementation of Template:Isnumeric)
 
(20 intermediate revisions by 8 users not shown)
Line 1: Line 1:
require ('strict');
local p = {}
local p = {}


p.trim = function(frame)
p.trim = function(frame)
local s = mw.text.trim(frame.args[1])
return mw.text.trim(frame.args[1] or "")
return s
end
end


p.upper = function(frame)
p.sentence = function (frame)
-- {{lc:}} is strip-marker safe, string.lower is not.
local s = mw.text.trim(frame.args[1] or "")
frame.args[1] = frame:callParserFunction('lc', frame.args[1])
return string.upper(s)
end

p.lower = function(frame)
local s = mw.text.trim(frame.args[1] or "")
return string.lower(s)
end

p.sentence = function (frame )
frame.args[1] = string.lower(frame.args[1])
return p.ucfirst(frame)
return p.ucfirst(frame)
end
end


p.ucfirst = function (frame )
p.ucfirst = function (frame )
local s = mw.text.trim( frame.args[1] or "" )
local s = frame.args[1];
if not s or '' == s or s:match ('^%s+$') then -- when <s> is nil, empty, or only whitespace
return s; -- abandon because nothing to do
end

s = mw.text.trim( frame.args[1] or "" )
local s1 = ""
local s1 = ""

-- if it's a list chop off and (store as s1) everything up to the first <li>
local prefix_patterns_t = { -- sequence of prefix patterns
local lipos = mw.ustring.find(s, "<li>" )
'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127', -- stripmarker
if lipos then
'^([%*;:#]+)', -- various list markup
s1 = mw.ustring.sub(s, 1, lipos + 3)
'^(\'\'\'*)', -- bold / italic markup
s = mw.ustring.sub(s, lipos + 4)
'^(%b<>)', -- html-like tags because some templates render these
'^(&%a+;)', -- html character entities because some templates render these
'^(&#%d+;)', -- html numeric (decimal) entities because some templates render these
'^(&#x%x+;)', -- html numeric (hexadecimal) entities because some templates render these
'^(%s+)', -- any whitespace characters
'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)', -- miscellaneous punctuation
}
local prefixes_t = {}; -- list, bold/italic, and html-like markup, & whitespace saved here

local function prefix_strip (s) -- local function to strip prefixes from <s>
for _, pattern in ipairs (prefix_patterns_t) do -- spin through <prefix_patterns_t>
if s:match (pattern) then -- when there is a match
local prefix = s:match (pattern); -- get a copy of the matched prefix
table.insert (prefixes_t, prefix); -- save it
s = s:sub (prefix:len() + 1); -- remove the prefix from <s>
return s, true; -- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker
end
end
return s; -- no prefix found; return <s> with nil flag
end
end

-- s1 is either "" or the first part of the list markup, so we can continue
local prefix_removed; -- flag; boolean true as long as prefix_strip() finds and removes a prefix
-- and prepend s1 to the returned string
local letterpos
repeat -- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>
if mw.ustring.find(s, "^%[%[[^|]+|[^%]]+%]%]") then
s, prefix_removed = prefix_strip (s);
-- this is a piped wikilink, so we capitalise the text, not the pipe
until (not prefix_removed); -- until <prefix_removed> is nil
local _

_, letterpos = mw.ustring.find(s, "|%A*%a") -- find the first letter after the pipe
s1 = table.concat (prefixes_t); -- recreate the prefix string for later reattachment
else

letterpos = mw.ustring.find(s, '%a')
local first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]'); -- extract wikilink at start of string if present; TODO: this can be string.match()?
end

if letterpos then
local upcased;
local first = mw.ustring.sub(s, 1, letterpos - 1)
if first_text then
local letter = mw.ustring.sub(s, letterpos, letterpos)
if first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then -- if <first_text> is a piped link
local rest = mw.ustring.sub(s, letterpos + 1)
upcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)'); -- get first letter character
return s1 .. first .. mw.ustring.upper(letter) .. rest
upcased = mw.ustring.upper (upcased); -- upcase first letter character
else
s = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased); -- replace
return s1 .. s
else -- here when <first_text> is a wikilink but not a piped link
upcased = mw.ustring.match (s, '^%[%[%W*%w'); -- get '[[' and first letter
upcased = mw.ustring.upper (upcased); -- upcase first letter character
s = mw.ustring.gsub (s, '^%[%[%W*%w', upcased); -- replace; no capture needed here
end

elseif s:match ('^%[%S+%s+[^%]]+%]') then -- if <s> is a ext link of some sort; must have label text
upcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)'); -- get first letter character
upcased = mw.ustring.upper (upcased); -- upcase first letter character
s = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased); -- replace
elseif s:match ('^%[%S+%s*%]') then -- if <s> is a ext link without label text; nothing to do
return s1 .. s; -- reattach prefix string (if present) and done

else -- <s> is not a wikilink or ext link; assume plain text
upcased = mw.ustring.match (s, '^%W*%w'); -- get the first letter character
upcased = mw.ustring.upper (upcased); -- upcase first letter character
s = mw.ustring.gsub (s, '^%W*%w', upcased); -- replace; no capture needed here
end
end

return s1 .. s; -- reattach prefix string (if present) and done
end
end



p.title = function (frame )
p.title = function (frame )
Line 63: Line 100:
local words = mw.text.split( s, " ")
local words = mw.text.split( s, " ")
for i, s in ipairs(words) do
for i, s in ipairs(words) do
s = string.lower( s )
-- {{lc:}} is strip-marker safe, string.lower is not.
s = frame:callParserFunction('lc', s)
if( i > 1 and alwayslower[s] == 1) then
if i == 1 or alwayslower[s] ~= 1 then
-- leave in lowercase
else
s = mw.getContentLanguage():ucfirst(s)
s = mw.getContentLanguage():ucfirst(s)
end
end
Line 83: Line 119:
if sep == "" then sep = ", " end
if sep == "" then sep = ", " end
local pattern = ".*" .. sep .. "(.*)"
local pattern = ".*" .. sep .. "(.*)"
a, b, last = s:find(pattern)
local a, b, last = s:find(pattern)
if a then
if a then
return last
return last
Line 105: Line 141:
local str = mw.text.trim(frame.args[1] or "")
local str = mw.text.trim(frame.args[1] or "")
return mw.text.nowiki(str)
return mw.text.nowiki(str)
end

-- posnq (position, no quotes) returns the numerical start position of the first occurrence
-- of one piece of text ("match") inside another ("str").
-- It returns nil if no match is found, or if either parameter is blank.
-- It takes the text to be searched in as the first unnamed parameter, which is trimmed.
-- It takes the text to match as the second unnamed parameter, which is trimmed and
-- any double quotes " are stripped out.
p.posnq = function(frame)
local args = frame.args
local pargs = frame:getParent().args
for k, v in pairs(pargs) do
args[k] = v
end
local str = mw.text.trim(args[1] or args.source or "")
local match = mw.text.trim(args[2] or args.target or ""):gsub('"', '')
if str == "" or match == "" then return nil end
local plain = mw.text.trim(args[3] or args.plain or "")
if plain == "false" then plain = false else plain = true end
local nomatch = mw.text.trim(args[4] or args.nomatch or "")
-- just take the start position
local pos = mw.ustring.find(str, match, 1, plain) or nomatch
return pos
end
end


Line 200: Line 213:
if plain:sub(1, 1) == "f" then plain = false else plain = true end
if plain:sub(1, 1) == "f" then plain = false else plain = true end
-- get the page content and look for 'text' - return position or nomatch
-- get the page content and look for 'text' - return position or nomatch
content = titleobj:getContent()
local content = titleobj and titleobj:getContent()
return mw.ustring.find(content, text, 1, plain) or nomatch -- returns multiple values
return content and mw.ustring.find(content, text, 1, plain) or nomatch
end
end
p.findpagetext = function(frame)
p.findpagetext = function(frame)
Line 212: Line 225:
-- just the first value
-- just the first value
return (p._findpagetext(args))
return (p._findpagetext(args))
end

-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}
-- Type is:
-- QUERY decodes + to space (default)
-- PATH does no extra decoding
-- WIKI decodes _ to space
p._urldecode = function(url, type)
url = url or ""
type = (type == "PATH" or type == "WIKI") and type
return mw.uri.decode( url, type )
end
-- {{#invoke:String2|urldecode|url=url|type=type}}
p.urldecode = function(frame)
return mw.uri.decode( frame.args.url, frame.args.type )
end
end


Line 223: Line 251:


-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
function p._escapePattern( pattern_str)
function p._escapePattern( pattern_str )
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" );
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
end
end


Line 230: Line 258:
p._getBoolean = p._GetParameters.getBoolean
p._getBoolean = p._GetParameters.getBoolean


--[[
--[[
Strip
Strip


Line 243: Line 271:
plain: A flag indicating that the chars should be understood as plain text. defaults to true.
plain: A flag indicating that the chars should be understood as plain text. defaults to true.


Leading and trailing whitespace is also automatically stripped from the string.
Leading and trailing whitespace is also automatically stripped from the string.
]]
]]
function p.strip( frame )
function p.strip( frame )
local new_args = p._getParameters( frame.args, {'source', 'chars', 'plain'} )
local new_args = p._getParameters( frame.args, {'source', 'chars', 'plain'} )
local source_str = new_args['source'] or '';
local source_str = new_args['source'] or ''
local chars = new_args['chars'] or '' or 'characters';
local chars = new_args['chars'] or '' or 'characters'
source_str = mw.text.trim(source_str);
source_str = mw.text.trim(source_str)
if source_str == '' or chars == '' then
if source_str == '' or chars == '' then
return source_str;
return source_str
end
end
local l_plain = p._getBoolean( new_args['plain'] or true );
local l_plain = p._getBoolean( new_args['plain'] or true )
if l_plain then
if l_plain then
chars = p._escapePattern( chars );
chars = p._escapePattern( chars )
end
end
local result;
local result
result = mw.ustring.gsub(source_str, "["..chars.."]", '')
result = mw.ustring.gsub(source_str, "["..chars.."]", '')
return result;
return result
end

--[[
Match any
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.
Returns the empty string if nothing matches for use in {{#if:}}

Usage:
{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.

Parameters:
source: the string to search
plain: A flag indicating that the patterns should be understood as plain text. defaults to true.
1, 2, 3, ...: the patterns to search for
]]
function p.matchAny(frame)
local source_str = frame.args['source'] or error('The source parameter is mandatory.')
local l_plain = p._getBoolean( frame.args['plain'] or true )
for i = 1, math.huge do
local pattern = frame.args[i]
if not pattern then return '' end
if mw.ustring.find(source_str, pattern, 1, l_plain) then
return tostring(i)
end
end
end

--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------

Converts a hyphen to a dash under certain conditions. The hyphen must separate
like items; unlike items are returned unmodified. These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)

any other forms are returned unmodified.

str may be a comma- or semicolon-separated list

]]
function p.hyphen_to_dash( str, spacing )
if (str == nil or str == '') then
return str
end

local accept

str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split

local out = {}
local list = mw.text.split (str, '%s*[,;]%s*') -- split str at comma or semicolon separators if there are any

for _, item in ipairs (list) do -- for each item in the list
item = mw.text.trim(item) -- trim whitespace
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2') -- replace hyphen, remove extraneous space characters
else
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–') -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
end
end
table.insert (out, item) -- add the (possibly modified) item to the output table
end

local temp_str = table.concat (out, ',' .. spacing) -- concatenate the output table into a comma separated string
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1') -- remove accept-this-as-written markup when it wraps all of concatenated out
if accept ~= 0 then
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1') -- when global markup removed, return original str; do it this way to suppress boolean second return value
end
return temp_str
end

function p.hyphen2dash( frame )
local str = frame.args[1] or ''
local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing

return p.hyphen_to_dash(str, spacing)
end

-- Similar to [[Module:String#endswith]]
function p.startswith(frame)
return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or ''
end

-- Implements [[Template:Isnumeric]]
function p.isnumeric(frame)
local s = frame.args[1] or frame:getParent().args[1]
local boolean = (frame.args.boolean or frame:getParent().args.boolean) == 'true'
if type(s) == 'string' and mw.getContentLanguage():parseFormattedNumber( s ) then
return boolean and 1 or s
end
return boolean and 0 or ''
end
end



Latest revision as of 15:35, 20 September 2024

Documentation for this module may be created at Module:String2/doc

require ('strict');
local p = {}

p.trim = function(frame)
	return mw.text.trim(frame.args[1] or "")
end

p.sentence = function (frame)
	-- {{lc:}} is strip-marker safe, string.lower is not.
	frame.args[1] = frame:callParserFunction('lc', frame.args[1])
	return p.ucfirst(frame)
end

p.ucfirst = function (frame )
	local s = frame.args[1];
	if not s or '' == s or s:match ('^%s+$') then								-- when <s> is nil, empty, or only whitespace
		return s;																-- abandon because nothing to do
	end

	s =  mw.text.trim( frame.args[1] or "" )
	local s1 = ""

	local prefix_patterns_t = {													-- sequence of prefix patterns
		'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',						-- stripmarker
		'^([%*;:#]+)',															-- various list markup
		'^(\'\'\'*)',															-- bold / italic markup
		'^(%b<>)',																-- html-like tags because some templates render these
		'^(&%a+;)',																-- html character entities because some templates render these
		'^(&#%d+;)',															-- html numeric (decimal) entities because some templates render these
		'^(&#x%x+;)',															-- html numeric (hexadecimal) entities because some templates render these
		'^(%s+)',																-- any whitespace characters
		'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)',					-- miscellaneous punctuation
		}
	
	local prefixes_t = {};														-- list, bold/italic, and html-like markup, & whitespace saved here

	local function prefix_strip (s)												-- local function to strip prefixes from <s>
		for _, pattern in ipairs (prefix_patterns_t) do							-- spin through <prefix_patterns_t> 
			if s:match (pattern) then											-- when there is a match
				local prefix = s:match (pattern);								-- get a copy of the matched prefix
				table.insert (prefixes_t, prefix);								-- save it
				s = s:sub (prefix:len() + 1);									-- remove the prefix from <s>
				return s, true;													-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker
			end
		end
		return s;																-- no prefix found; return <s> with nil flag
	end

	local prefix_removed;														-- flag; boolean true as long as prefix_strip() finds and removes a prefix
	
	repeat																		-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>
		s, prefix_removed = prefix_strip (s);
	until (not prefix_removed);													-- until <prefix_removed> is nil

	s1 = table.concat (prefixes_t);												-- recreate the prefix string for later reattachment

	local first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]');					-- extract wikilink at start of string if present; TODO: this can be string.match()?

	local upcased;
	if first_text then
		if first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then						-- if <first_text> is a piped link
			upcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)');				-- get first letter character
			upcased = mw.ustring.upper (upcased);								-- upcase first letter character
			s = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased);		-- replace
		else																	-- here when <first_text> is a wikilink but not a piped link
			upcased = mw.ustring.match (s, '^%[%[%W*%w');						-- get '[[' and first letter
			upcased = mw.ustring.upper (upcased);								-- upcase first letter character
			s = mw.ustring.gsub (s, '^%[%[%W*%w', upcased);						-- replace; no capture needed here
		end

	elseif s:match ('^%[%S+%s+[^%]]+%]') then									-- if <s> is a ext link of some sort; must have label text
		upcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)');						-- get first letter character
		upcased = mw.ustring.upper (upcased);									-- upcase first letter character
		s = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased);			-- replace
	
	elseif s:match ('^%[%S+%s*%]') then											-- if <s> is a ext link without label text; nothing to do
		return s1 .. s;															-- reattach prefix string (if present) and done

	else																		-- <s> is not a wikilink or ext link; assume plain text
		upcased = mw.ustring.match (s, '^%W*%w');								-- get the first letter character
		upcased = mw.ustring.upper (upcased);									-- upcase first letter character
		s = mw.ustring.gsub (s, '^%W*%w', upcased);								-- replace; no capture needed here
	end

	return s1 .. s;																-- reattach prefix string (if present) and done
end


p.title = function (frame )
	-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html
	-- recommended by The U.S. Government Printing Office Style Manual:
	-- "Capitalize all words in titles of publications and documents,
	-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor."
	local alwayslower = {['a'] = 1, ['an'] = 1, ['the'] = 1,
		['and'] = 1, ['but'] = 1, ['or'] = 1, ['for'] = 1,
		['nor'] = 1, ['on'] = 1, ['in'] = 1, ['at'] = 1, ['to'] = 1,
		['from'] = 1, ['by'] = 1, ['of'] = 1, ['up'] = 1 }
	local res = ''
	local s =  mw.text.trim( frame.args[1] or "" )
	local words = mw.text.split( s, " ")
	for i, s in ipairs(words) do
		-- {{lc:}} is strip-marker safe, string.lower is not.
		s = frame:callParserFunction('lc', s)
		if i == 1 or alwayslower[s] ~= 1 then
			s = mw.getContentLanguage():ucfirst(s)
		end
		words[i] = s
	end
	return table.concat(words, " ")
end

-- findlast finds the last item in a list
-- the first unnamed parameter is the list
-- the second, optional unnamed parameter is the list separator (default = comma space)
-- returns the whole list if separator not found
p.findlast = function(frame)
	local s =  mw.text.trim( frame.args[1] or "" )
	local sep = frame.args[2] or ""
	if sep == "" then sep = ", " end
	local pattern = ".*" .. sep .. "(.*)"
	local a, b, last = s:find(pattern)
	if a then
		return last
	else
		return s
	end
end

-- stripZeros finds the first number and strips leading zeros (apart from units)
-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"
p.stripZeros = function(frame)
	local s = mw.text.trim(frame.args[1] or "")
	local n = tonumber( string.match( s, "%d+" ) ) or ""
	s = string.gsub( s, "%d+", n, 1 )
	return s
end

-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string
-- it takes an unnamed parameter and trims whitespace, then removes any wikicode
p.nowiki = function(frame)
	local str = mw.text.trim(frame.args[1] or "")
	return mw.text.nowiki(str)
end

-- split splits text at boundaries specified by separator
-- and returns the chunk for the index idx (starting at 1)
-- #invoke:String2 |split |text |separator |index |true/false
-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false
-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=true
p.split = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = args[1] or args.txt or ""
	if txt == "" then return nil end
	local sep = (args[2] or args.sep or ""):gsub('"', '')
	local idx = tonumber(args[3] or args.idx) or 1
	local plain = (args[4] or args.plain or "true"):sub(1,1)
	plain = (plain ~= "f" and plain ~= "n" and plain ~= "0")
	local splittbl = mw.text.split( txt, sep, plain )
	if idx < 0 then idx = #splittbl + idx + 1 end
	return splittbl[idx]
end

-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each number it finds into a percentage and returns the resultant string.
p.val2percent = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = mw.text.trim(args[1] or args.txt or "")
	if txt == "" then return nil end
	local function v2p (x)
		x = (tonumber(x) or 0) * 100
		if x == math.floor(x) then x = math.floor(x) end
		return x .. "%"
	end
	txt = txt:gsub("%d[%d%.]*", v2p) -- store just the string
	return txt
end

-- one2a scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.
p.one2a = function(frame)
	local args = frame.args
	if not(args[1] or args.txt) then args = frame:getParent().args end
	local txt = mw.text.trim(args[1] or args.txt or "")
	if txt == "" then return nil end
	txt = txt:gsub(" one ", " a "):gsub("^one", "a"):gsub("One ", "A "):gsub("a ([aeiou])", "an %1"):gsub("A ([aeiou])", "An %1")
	return txt
end

-- findpagetext returns the position of a piece of text in a page
-- First positional parameter or |text is the search text
-- Optional parameter |title is the page title, defaults to current page
-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search
-- Optional parameter |nomatch is the return value when no match is found; default is nil
p._findpagetext = function(args)
	-- process parameters
	local nomatch = args.nomatch or ""
	if nomatch == "" then nomatch = nil end
	--
	local text = mw.text.trim(args[1] or args.text or "")
	if text == "" then return nil end
	--
	local title = args.title or ""
	local titleobj
	if title == "" then
		titleobj = mw.title.getCurrentTitle()
	else
		titleobj = mw.title.new(title)
	end
	--
	local plain = args.plain or ""
	if plain:sub(1, 1) == "f" then plain = false else plain = true end
	-- get the page content and look for 'text' - return position or nomatch
	local content = titleobj and titleobj:getContent()
	return content and mw.ustring.find(content, text, 1, plain) or nomatch
end
p.findpagetext = function(frame)
	local args = frame.args
	local pargs = frame:getParent().args
	for k, v in pairs(pargs) do
		args[k] = v
	end
	if not (args[1] or args.text) then return nil end
	-- just the first value
	return (p._findpagetext(args))
end

-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}
-- Type is:
-- QUERY decodes + to space (default)
-- PATH does no extra decoding
-- WIKI decodes _ to space
p._urldecode = function(url, type)
	url = url or ""
	type = (type == "PATH" or type == "WIKI") and type
	return mw.uri.decode( url, type )
end
-- {{#invoke:String2|urldecode|url=url|type=type}}
p.urldecode = function(frame)
	return mw.uri.decode( frame.args.url, frame.args.type )
end

-- what follows was merged from Module:StringFunc

-- helper functions
p._GetParameters = require('Module:GetParameters')

-- Argument list helper function, as per Module:String
p._getParameters = p._GetParameters.getParameters

-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
function p._escapePattern( pattern_str )
	return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
end

-- Helper Function to interpret boolean strings, as per Module:String
p._getBoolean = p._GetParameters.getBoolean

--[[
Strip

This function Strips characters from string

Usage:
{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}

Parameters
	source: The string to strip
	chars:  The pattern or list of characters to strip from string, replaced with ''
	plain:  A flag indicating that the chars should be understood as plain text. defaults to true.

Leading and trailing whitespace is also automatically stripped from the string.
]]
function p.strip( frame )
	local new_args = p._getParameters( frame.args,  {'source', 'chars', 'plain'} )
	local source_str = new_args['source'] or ''
	local chars = new_args['chars'] or '' or 'characters'
	source_str = mw.text.trim(source_str)
	if source_str == '' or chars == '' then
		return source_str
	end
	local l_plain = p._getBoolean( new_args['plain'] or true )
	if l_plain then
		chars = p._escapePattern( chars )
	end
	local result
	result = mw.ustring.gsub(source_str, "["..chars.."]", '')
	return result
end

--[[
Match any
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.
Returns the empty string if nothing matches for use in {{#if:}}

Usage:
	{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.

Parameters:
	source: the string to search
	plain:  A flag indicating that the patterns should be understood as plain text. defaults to true.
	1, 2, 3, ...: the patterns to search for
]]
function p.matchAny(frame)
	local source_str = frame.args['source'] or error('The source parameter is mandatory.')
	local l_plain = p._getBoolean( frame.args['plain'] or true )
	for i = 1, math.huge do
		local pattern = frame.args[i]
		if not pattern then return '' end
		if mw.ustring.find(source_str, pattern, 1, l_plain) then
			return tostring(i)
		end
	end
end

--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------

Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
	letter - letter (A - B)
	digit - digit (4-5)
	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
	letterdigit - letterdigit (A1-A5) (an optional separator between letter and
		digit is supported – a.1-a.5 or a-1-a-5)
	digitletter - digitletter (5a - 5d) (an optional separator between letter and
		digit is supported – 5.a-5.d or 5-a-5-d)

any other forms are returned unmodified.

str may be a comma- or semicolon-separated list

]]
function p.hyphen_to_dash( str, spacing )
	if (str == nil or str == '') then
		return str
	end

	local accept

	str = mw.text.decode(str, true )											-- replace html entities with their characters; semicolon mucks up the text.split

	local out = {}
	local list = mw.text.split (str, '%s*[,;]%s*')								-- split str at comma or semicolon separators if there are any

	for _, item in ipairs (list) do												-- for each item in the list
		item = mw.text.trim(item)												-- trim whitespace
		item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
		if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then	-- if a hyphenated range or has endash or emdash separators
			if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or			-- letterdigit hyphen letterdigit (optional separator between letter and digit)
				item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or			-- digitletter hyphen digitletter (optional separator between digit and letter)
				item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or			-- digit separator digit hyphen digit separator digit
				item:match ('^%d+%s*%-%s*%d+$') or								-- digit hyphen digit
				item:match ('^%a+%s*%-%s*%a+$') then							-- letter hyphen letter
					item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')	-- replace hyphen, remove extraneous space characters
			else
				item = mw.ustring.gsub (item, '%s*[–—]%s*', '–')				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
			end
		end
		table.insert (out, item)												-- add the (possibly modified) item to the output table
	end

	local temp_str = table.concat (out, ',' .. spacing)							-- concatenate the output table into a comma separated string
	temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')					-- remove accept-this-as-written markup when it wraps all of concatenated out
	if accept ~= 0 then
		temp_str = str:gsub ('^%(%((.+)%)%)$', '%1')							-- when global markup removed, return original str; do it this way to suppress boolean second return value
	end
	return temp_str
end

function p.hyphen2dash( frame )
	local str = frame.args[1] or ''
	local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing

	return p.hyphen_to_dash(str, spacing)
end

-- Similar to [[Module:String#endswith]]
function p.startswith(frame)
	return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or ''
end

-- Implements [[Template:Isnumeric]]
function p.isnumeric(frame)
	local s = frame.args[1] or frame:getParent().args[1]
	local boolean = (frame.args.boolean or frame:getParent().args.boolean) == 'true'
	if type(s) == 'string' and mw.getContentLanguage():parseFormattedNumber( s ) then
		return boolean and 1 or s
	end
	return boolean and 0 or ''
end

return p