local lpeg = require 'lpeg'
for k, v in pairs(lpeg) do
if type(k) == 'string' and k:find '^%u' then
_ENV[k] = v
end
end
local ws = S ' \t\n\r\v'
local link_separator = S ' \t' + P '-'
local count = 0
local count_up = function ()
count = count + 1
return true
end
require "mediawiki"
local languages, etymology_languages, families =
require "Module:languages",
require "Module:etymology languages",
require "Module:families"
local function get_non_etymological(language_code)
local lang = languages.getByCode(language_code)
or etymology_languages.getByCode(language_code)
or families.getByCode(language_code)
if lang then
lang = languages.getNonEtymological(lang)
return lang and lang:getCode()
end
end
local find_consecutive_link_templates = P {
((V 'comment' + (1 - V 'consecutive_link_templates'))^1
* C(Cmt(V 'consecutive_link_templates', count_up)))^1,
-- (V 'comment' + C(V 'consecutive_link_templates') + 1)^1,
consecutive_link_templates = V 'start_link_template'
* Cmt(link_separator * Cb 'language_code' * V 'link_template' * Cb 'language_code',
function (_, _, a, b)
return a == b
end)^1,
start_link_template = P '{{' * ws^0
* ((P 'm' * P 'ention'^-1) * ws^0
* P '|' * ws^0 * Cg((R 'az' + P '-')^2, 'language_code') * ws^0
+ (P 'cog' * P 'nate'^-1
+ P 'n' * (P 'oncognate' + P 'cog')) * ws^0
* P '|' * ws^0 * Cg((R 'az' + P '-')^2 / get_non_etymological, 'language_code') * ws^0
+ (P 'der' * P 'ived'^-1
+ P 'bor' * P 'rowed'^-1
+ P 'inh' * P 'erited'^-1) * ws^0
* P '|' * ws^0 * (R 'az' + P '-')^2 * ws^0
* P '|' * ws^0 * Cg((R 'az' + P '-')^2 / get_non_etymological, 'language_code') * ws^0)
* (P '|' * ((1 - (P '|' + P '{{' + P '}}'))^1 + V 'template'))^0 * P '}}',
link_template = P '{{' * ws^0 * (P 'm' * P 'ention'^-1) * ws^0
* P '|' * ws^0 * Cg((R 'az' + P '-')^2, 'language_code') * ws^0
* (P '|' * ((1 - (P '|' + P '{{' + P '}}'))^1 + V 'template'))^0 * P '}}',
template = P '{{' * (((1 - (P '{{' + P '}}'))^1 + V 'template'))^1 * P '}}',
comment = P '<!--' * (1 - P '-->')^0 * P '-->',
}
-- print(find_consecutive_link_templates:match 'I am going to {{m|en|test}} {{m|en|it}}.')
-- unique_matches '\1.-\2.-%f[\1\0]':concat '':gsub('\1(.-)\n(\2.-\n)%f[\1\0]', function (title, templates) return '\n; [[' .. title .. ']]\n' .. templates:gsub('\2(.-)\n%f[\2\1\0]', '<pre><nowiki>%1</nowiki></pre>\n') end)
-- unique_matches '\1.-\2.-%f[\1\0]':concat '':gsub('\1(.-)\n(\2.-\n)%f[\1\0]', function (title, templates) local data = { title = title, templates = {} } for template in templates:gmatch '\2(.-)\n%f[\2\1\0]' do table.insert(data.templates, template) end print(require 'cjson'.encode(data)) end)
local max = math.huge
return function (content, title)
local matches = { find_consecutive_link_templates:match(content) }
if type(matches[1]) == 'string' then
io.write('\1', title, '\n')
for _, match in ipairs(matches) do
io.write('\2', match, '\n')
end
end
return count < max
end