Module:String2

Documentation for this module may be created at Module:String2/doc

require ('strict');local p = {}p.trim = function(frame)return mw.text.trim(frame.args[1] or "")endp.sentence = function (frame)-- {{lc:}} is strip-marker safe, string.lower is not.frame.args[1] = frame:callParserFunction('lc', frame.args[1])return p.ucfirst(frame)endp.ucfirst = function (frame )local s = frame.args[1];if not s or '' == s or s:match ('^%s+$') then-- when <s> is nil, empty, or only whitespacereturn s;-- abandon because nothing to doends =  mw.text.trim( frame.args[1] or "" )local s1 = ""local prefix_patterns_t = {-- sequence of prefix patterns'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',-- stripmarker'^([%*;:#]+)',-- various list markup'^(\'\'\'*)',-- bold / italic markup'^(%b<>)',-- html-like tags because some templates render these'^(&%a+;)',-- html character entities because some templates render these'^(&#%d+;)',-- html numeric (decimal) entities because some templates render these'^(&#x%x+;)',-- html numeric (hexadecimal) entities because some templates render these'^(%s+)',-- any whitespace characters'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)',-- miscellaneous punctuation}local prefixes_t = {};-- list, bold/italic, and html-like markup, & whitespace saved herelocal function prefix_strip (s)-- local function to strip prefixes from <s>for _, pattern in ipairs (prefix_patterns_t) do-- spin through <prefix_patterns_t> if s:match (pattern) then-- when there is a matchlocal prefix = s:match (pattern);-- get a copy of the matched prefixtable.insert (prefixes_t, prefix);-- save its = s:sub (prefix:len() + 1);-- remove the prefix from <s>return s, true;-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarkerendendreturn s;-- no prefix found; return <s> with nil flagendlocal prefix_removed;-- flag; boolean true as long as prefix_strip() finds and removes a prefixrepeat-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>s, prefix_removed = prefix_strip (s);until (not prefix_removed);-- until <prefix_removed> is nils1 = table.concat (prefixes_t);-- recreate the prefix string for later reattachmentlocal first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]');-- extract wikilink at start of string if present; TODO: this can be string.match()?local upcased;if first_text thenif first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then-- if <first_text> is a piped linkupcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)');-- get first letter characterupcased = mw.ustring.upper (upcased);-- upcase first letter characters = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased);-- replaceelse-- here when <first_text> is a wikilink but not a piped linkupcased = mw.ustring.match (s, '^%[%[%W*%w');-- get '[[' and first letterupcased = mw.ustring.upper (upcased);-- upcase first letter characters = mw.ustring.gsub (s, '^%[%[%W*%w', upcased);-- replace; no capture needed hereendelseif s:match ('^%[%S+%s+[^%]]+%]') then-- if <s> is a ext link of some sort; must have label textupcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)');-- get first letter characterupcased = mw.ustring.upper (upcased);-- upcase first letter characters = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased);-- replaceelseif s:match ('^%[%S+%s*%]') then-- if <s> is a ext link without label text; nothing to doreturn s1 .. s;-- reattach prefix string (if present) and doneelse-- <s> is not a wikilink or ext link; assume plain textupcased = mw.ustring.match (s, '^%W*%w');-- get the first letter characterupcased = mw.ustring.upper (upcased);-- upcase first letter characters = mw.ustring.gsub (s, '^%W*%w', upcased);-- replace; no capture needed hereendreturn s1 .. s;-- reattach prefix string (if present) and doneendp.title = function (frame )-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html-- recommended by The U.S. Government Printing Office Style Manual:-- "Capitalize all words in titles of publications and documents,-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor."local alwayslower = {['a'] = 1, ['an'] = 1, ['the'] = 1,['and'] = 1, ['but'] = 1, ['or'] = 1, ['for'] = 1,['nor'] = 1, ['on'] = 1, ['in'] = 1, ['at'] = 1, ['to'] = 1,['from'] = 1, ['by'] = 1, ['of'] = 1, ['up'] = 1 }local res = ''local s =  mw.text.trim( frame.args[1] or "" )local words = mw.text.split( s, " ")for i, s in ipairs(words) do-- {{lc:}} is strip-marker safe, string.lower is not.s = frame:callParserFunction('lc', s)if i == 1 or alwayslower[s] ~= 1 thens = mw.getContentLanguage():ucfirst(s)endwords[i] = sendreturn table.concat(words, " ")end-- findlast finds the last item in a list-- the first unnamed parameter is the list-- the second, optional unnamed parameter is the list separator (default = comma space)-- returns the whole list if separator not foundp.findlast = function(frame)local s =  mw.text.trim( frame.args[1] or "" )local sep = frame.args[2] or ""if sep == "" then sep = ", " endlocal pattern = ".*" .. sep .. "(.*)"local a, b, last = s:find(pattern)if a thenreturn lastelsereturn sendend-- stripZeros finds the first number and strips leading zeros (apart from units)-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"p.stripZeros = function(frame)local s = mw.text.trim(frame.args[1] or "")local n = tonumber( string.match( s, "%d+" ) ) or ""s = string.gsub( s, "%d+", n, 1 )return send-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string-- it takes an unnamed parameter and trims whitespace, then removes any wikicodep.nowiki = function(frame)local str = mw.text.trim(frame.args[1] or "")return mw.text.nowiki(str)end-- split splits text at boundaries specified by separator-- and returns the chunk for the index idx (starting at 1)-- #invoke:String2 |split |text |separator |index |true/false-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=truep.split = function(frame)local args = frame.argsif not(args[1] or args.txt) then args = frame:getParent().args endlocal txt = args[1] or args.txt or ""if txt == "" then return nil endlocal sep = (args[2] or args.sep or ""):gsub('"', '')local idx = tonumber(args[3] or args.idx) or 1local plain = (args[4] or args.plain or "true"):sub(1,1)plain = (plain ~= "f" and plain ~= "n" and plain ~= "0")local splittbl = mw.text.split( txt, sep, plain )if idx < 0 then idx = #splittbl + idx + 1 endreturn splittbl[idx]end-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=-- it converts each number it finds into a percentage and returns the resultant string.p.val2percent = function(frame)local args = frame.argsif not(args[1] or args.txt) then args = frame:getParent().args endlocal txt = mw.text.trim(args[1] or args.txt or "")if txt == "" then return nil endlocal function v2p (x)x = (tonumber(x) or 0) * 100if x == math.floor(x) then x = math.floor(x) endreturn x .. "%"endtxt = txt:gsub("%d[%d%.]*", v2p) -- store just the stringreturn txtend-- one2a scans through a string, passed as either the first unnamed parameter or |txt=-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.p.one2a = function(frame)local args = frame.argsif not(args[1] or args.txt) then args = frame:getParent().args endlocal txt = mw.text.trim(args[1] or args.txt or "")if txt == "" then return nil endtxt = txt:gsub(" one ", " a "):gsub("^one", "a"):gsub("One ", "A "):gsub("a ([aeiou])", "an %1"):gsub("A ([aeiou])", "An %1")return txtend-- findpagetext returns the position of a piece of text in a page-- First positional parameter or |text is the search text-- Optional parameter |title is the page title, defaults to current page-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search-- Optional parameter |nomatch is the return value when no match is found; default is nilp._findpagetext = function(args)-- process parameterslocal nomatch = args.nomatch or ""if nomatch == "" then nomatch = nil end--local text = mw.text.trim(args[1] or args.text or "")if text == "" then return nil end--local title = args.title or ""local titleobjif title == "" thentitleobj = mw.title.getCurrentTitle()elsetitleobj = mw.title.new(title)end--local plain = args.plain or ""if plain:sub(1, 1) == "f" then plain = false else plain = true end-- get the page content and look for 'text' - return position or nomatchlocal content = titleobj and titleobj:getContent()return content and mw.ustring.find(content, text, 1, plain) or nomatchendp.findpagetext = function(frame)local args = frame.argslocal pargs = frame:getParent().argsfor k, v in pairs(pargs) doargs[k] = vendif not (args[1] or args.text) then return nil end-- just the first valuereturn (p._findpagetext(args))end-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}-- Type is:-- QUERY decodes + to space (default)-- PATH does no extra decoding-- WIKI decodes _ to spacep._urldecode = function(url, type)url = url or ""type = (type == "PATH" or type == "WIKI") and typereturn mw.uri.decode( url, type )end-- {{#invoke:String2|urldecode|url=url|type=type}}p.urldecode = function(frame)return mw.uri.decode( frame.args.url, frame.args.type )end-- what follows was merged from Module:StringFunc-- helper functionsp._GetParameters = require('Module:GetParameters')-- Argument list helper function, as per Module:Stringp._getParameters = p._GetParameters.getParameters-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:Stringfunction p._escapePattern( pattern_str )return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )end-- Helper Function to interpret boolean strings, as per Module:Stringp._getBoolean = p._GetParameters.getBoolean--[[StripThis function Strips characters from stringUsage:{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}Parameterssource: The string to stripchars:  The pattern or list of characters to strip from string, replaced with ''plain:  A flag indicating that the chars should be understood as plain text. defaults to true.Leading and trailing whitespace is also automatically stripped from the string.]]function p.strip( frame )local new_args = p._getParameters( frame.args,  {'source', 'chars', 'plain'} )local source_str = new_args['source'] or ''local chars = new_args['chars'] or '' or 'characters'source_str = mw.text.trim(source_str)if source_str == '' or chars == '' thenreturn source_strendlocal l_plain = p._getBoolean( new_args['plain'] or true )if l_plain thenchars = p._escapePattern( chars )endlocal resultresult = mw.ustring.gsub(source_str, "["..chars.."]", '')return resultend--[[Match anyReturns the index of the first given pattern to match the input. Patterns must be consecutively numbered.Returns the empty string if nothing matches for use in {{#if:}}Usage:{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.Parameters:source: the string to searchplain:  A flag indicating that the patterns should be understood as plain text. defaults to true.1, 2, 3, ...: the patterns to search for]]function p.matchAny(frame)local source_str = frame.args['source'] or error('The source parameter is mandatory.')local l_plain = p._getBoolean( frame.args['plain'] or true )for i = 1, math.huge dolocal pattern = frame.args[i]if not pattern then return '' endif mw.ustring.find(source_str, pattern, 1, l_plain) thenreturn tostring(i)endendend--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------Converts a hyphen to a dash under certain conditions.  The hyphen must separatelike items; unlike items are returned unmodified.  These forms are modified:letter - letter (A - B)digit - digit (4-5)digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)letterdigit - letterdigit (A1-A5) (an optional separator between letter anddigit is supported – a.1-a.5 or a-1-a-5)digitletter - digitletter (5a - 5d) (an optional separator between letter anddigit is supported – 5.a-5.d or 5-a-5-d)any other forms are returned unmodified.str may be a comma- or semicolon-separated list]]function p.hyphen_to_dash( str, spacing )if (str == nil or str == '') thenreturn strendlocal acceptstr = mw.text.decode(str, true )-- replace html entities with their characters; semicolon mucks up the text.splitlocal out = {}local list = mw.text.split (str, '%s*[,;]%s*')-- split str at comma or semicolon separators if there are anyfor _, item in ipairs (list) do-- for each item in the listitem = mw.text.trim(item)-- trim whitespaceitem, accept = item:gsub ('^%(%((.+)%)%)$', '%1')if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then-- if a hyphenated range or has endash or emdash separatorsif item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or-- letterdigit hyphen letterdigit (optional separator between letter and digit)item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or-- digitletter hyphen digitletter (optional separator between digit and letter)item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or-- digit separator digit hyphen digit separator digititem:match ('^%d+%s*%-%s*%d+$') or-- digit hyphen digititem:match ('^%a+%s*%-%s*%a+$') then-- letter hyphen letteritem = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')-- replace hyphen, remove extraneous space characterselseitem = mw.ustring.gsub (item, '%s*[–—]%s*', '–')-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespaceendendtable.insert (out, item)-- add the (possibly modified) item to the output tableendlocal temp_str = table.concat (out, ',' .. spacing)-- concatenate the output table into a comma separated stringtemp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')-- remove accept-this-as-written markup when it wraps all of concatenated outif accept ~= 0 thentemp_str = str:gsub ('^%(%((.+)%)%)$', '%1')-- when global markup removed, return original str; do it this way to suppress boolean second return valueendreturn temp_strendfunction p.hyphen2dash( frame )local str = frame.args[1] or ''local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacingreturn p.hyphen_to_dash(str, spacing)end-- Similar to [[Module:String#endswith]]function p.startswith(frame)return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or ''endreturn p