Module:Scripts/findBestScript

From Imperivm Romanvm
< Module:Scripts
Revision as of 13:48, 26 January 2022 by wiktionary>Surjection
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Scripts/findBestScript/doc

return function (export, text, lang, scripts, forceDetect)
	--[=[
		Remove any HTML entities; catfix function in [[Module:utilities]]
		adds tagging to a no-break space (&nbsp;), which contains Latin characters;
		hence Latin was returned as the script if "Latn" is one of the language's scripts.
	]=]
	text = string.gsub(text, "&[a-zA-Z0-9]+;", "")
	
	-- Try to match every script against the text,
	-- and return the one with the most matching characters.
	local bestcount = 0
	local bestscript = nil
	
	-- Get length of text minus any spacing or punctuation characters.
	-- Counting instances of UTF-8 character pattern is faster than mw.ustring.len.
	local _, length = string.gsub(mw.ustring.gsub(text, "[%s%p]+", ""), "[\1-\127\194-\244][\128-\191]*", "")
	
	if length == 0 then
		return export.getByCode("None")
	end
	
	for i, script in ipairs(scripts) do
		local count = script:countCharacters(text)
		
		if count >= length then
			return script
		end
		
		if count > bestcount then
			bestcount = count
			bestscript = script
		end
	end
	
	if bestscript then
		return bestscript
	end
	
	-- No matching script was found. Return "None".
	return export.getByCode("None")
end