Module:User:Theknightwho/wikitext parser

This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local anchor_encode = mw.uri.anchorEncode
local byte = string.byte
local char = string.char
local concat = table.concat
local explode = require("Module:string utilities").explode_utf8
local format = string.format
local insert = table.insert
local load_data = mw.loadData
local lower = string.lower
local match = string.match
local pairs = pairs
local rawset = rawset
local remove = table.remove
local rep = string.rep
local require = require
local select = select
local setmetatable = setmetatable
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local type = type
local ulower = string.ulower
local umatch = mw.ustring.match
local unpack = unpack
local upper = string.upper
local uupper = string.uupper

local m_parser = require("Module:parser")
local d = load_data("Module:User:Theknightwho/wikitext parser/data")

local Parser, Node = m_parser.new()

local export = {}

------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------

-- Like tostring, but no character escapes are applied.
local function rawstring(this)
	return type(this) == "table" and this:__rawstring() or tostring(this)
end

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

local Proxy = {}

function Proxy:__index(k)
	return Proxy[k] or self.__chars[k]
end

function Proxy:__newindex(k, v)
	local key = self.__keys[k]
	if key then
		self.__chars[k] = v
		self.__parents[k][key] = v
	elseif key == false then
		error("Character is immutable.")
	else
		error("Invalid key.")
	end
end

function Proxy:build(a, b, c)
	insert(self.__chars, a)
	insert(self.__parents, b)
	insert(self.__keys, c)
end

function Proxy:iter(i)
	i = i + 1
	local char = self.__chars[i]
	if char then
		return i, self[i], self, self.__keys[i], self.__parents[i]
	end
end

function Node:new_proxy()
	return setmetatable({
		__node = self,
		__chars = {},
		__keys = {},
		__parents = {}
	}, Proxy)
end

-- Iterates over display characters.
function Node:pairs_display()
	local proxy = self:new_proxy()
	for char, parent, key in self:__pairs("next_display") do
		if type(char) == "string" then
			proxy:build(char, parent, key)
		end
	end
	return Proxy.iter, proxy, 0
end

-- Iterates over raw wikitext characters.
function Node:pairs_raw()
	local proxy = self:new_proxy()
	for char, parent, key, mut in self:__pairs("next_raw") do
		if (
			type(char) == "string" or
			char.type == "apostrophes"
		) then
			proxy:build(char, parent, mut and key or false)
		end
	end
	return Proxy.iter, proxy, 0
end

do
	local function escape(this)
		local len = #this
		if len == 1 then
			this = byte(this)
		elseif len == 2 then
			local b1, b2 = byte(this, 1, 2)
			this = 0x40 * b1 + b2 - 0x3080
		elseif len == 3 then
			local b1, b2, b3 = byte(this, 1, 3)
			this = 0x1000 * b1 + 0x40 * b2 + b3 - 0xE2080
		elseif len == 4 then
			local b1, b2, b3, b4 = byte(this, 1, 4)
			this = 0x40000 * b1 + 0x1000 * b2 + 0x40 * b3 + b4 - 0x3C82080
		end
		return "&#" .. this .. ";"
	end
	
	local function is_newline(this)
		return not this or
			this == "\n" or
			this == "\r"
	end
	
	local function iterate(i, this, proxy, key, word, wikilink)
		if not key then
			return this
		elseif (
			d.NOWIKI[this] or
			wikilink == 2 and match(this, "^%a$")
		) then
			return escape(this)
		end
		local prev = proxy[i - 1]
		if (
			(is_newline(prev) and d.NOWIKI_START[this]) or
			prev == "_" and this == "_"
		) then
			return escape(this)
		end
		local nxt, nxt2 = proxy[i + 1], proxy[i + 2]
		if (
			this == ":" and nxt == "/" and nxt2 == "/" or
			is_newline(prev) and this == "-" and nxt == "-" and nxt2 == "-" and proxy[i + 3] == "-" or
			this == ":" and d.EL_SCHEMES_UNSLASHED[concat(word)] or
			d.SPACE_SEPARATOR[this] and d.MAGIC_LINKS[concat(word)]
		) then
			return escape(this)
		end
		return this
	end
	
	function Node:__tostring()
		local ret, word, ret_len, word_len, wikilink = {}, {}, 0, 0, 0
		for i, this, proxy, key in self:pairs_raw() do
			this = iterate(i, this, proxy, key, word, wikilink)
			ret_len = ret_len + 1
			ret[ret_len] = this
			if match(this, "[%w_]") then
				word_len = word_len + 1
				word[word_len] = this
			else
				for i = 1, word_len do
					word[i] = nil
				end
				word_len = 0
			end
			-- Raw "]" only appears at the end of a bracketed external link or wikilink. If there are two in a row, then it must be the end of a wikilink.
			wikilink = this == "]" and wikilink + 1 or 0
		end
		return concat(ret)
	end
	
	function Node:__rawstring()
		local ret, len = {}, 0
		for _, this in self:pairs_raw() do
			len = len + 1
			ret[len] = this
		end
		return concat(ret)
	end
end

local Wikitext = Node:new_class("wikitext")

do
	local _new = Wikitext.new
	function Wikitext:new(t, force_wrapper)
		if type(t) == "string" then
			t = explode(t)
		end
		return _new(self, t, force_wrapper)
	end
end

function Wikitext:next_display(i)
	i = i + 1
	return self[i], i, true
end

Wikitext.next_raw = Wikitext.next_display
Wikitext.__tostring = Node.__tostring

-- Return no children.
function Node:next_no_op()
end

local Apostrophes = Node:new_class("apostrophes")
Apostrophes.next = Node.next_no_op
Apostrophes.next_display = Node.next_no_op
Apostrophes.next_raw = Node.next_no_op

function Apostrophes:__tostring()
	return rep("'", self.num)
end

local ExternalLink = Node:new_class("external link")

function ExternalLink:next(i)
	if i == 0 then
		i = self.scheme and "scheme" or "url"
	elseif i == "scheme" then
		i = "url"
	elseif i == "url" then
		i = "display"
	else
		return nil
	end
	return self[i], i
end

-- FIXME: need to return something immutable if there's no display
function ExternalLink:next_display(i)
	if i == 0 then
		i = "display"
		return self[i], i
	end
end

function ExternalLink:next_raw(i)
	if i == 0 then
		i = 1
		if self.bracketed then
			return "[", i, false
		end
	end
	if i == 1 then
		i = self.scheme and "scheme" or "url"
	elseif i == "scheme" then
		i = 2
		return ":", i, false
	elseif i == 2 then
		i = "url"
	elseif i == "url" then
		if self.display then
			i = 3
			return " ", i, false
		elseif self.bracketed then
			return "]", nil, false
		end
		return nil
	elseif i == 3 then
		i = "display"
	elseif i == "display" and self.display then
		return "]", nil, false
	else
		return nil
	end
	return self[i], i, true
end

local HTMLEntity = Node:new_class("html entity")

function HTMLEntity:next_display(i)
	if i == 0 then
		i = "char"
		return self[i], i
	end
end

function HTMLEntity:next_raw(i)
	i = i + 1
	return self[i], i, false
end

function HTMLEntity:__tostring()
	return self.char:__tostring()
end

function HTMLEntity:__rawstring()
	return self.char:__rawstring()
end

local HTMLTag = Node:new_class("html tag")
HTMLTag.next = Node.next_no_op
--[==[
function HTMLTag:__tostring()
	local str = "<"
	if self["end"] then
		str = str .. "/"
	end
	str = str .. tostring(self.name)
	if self.attributes then
		for i = 1, #self.attributes, 2 do
			str = str .. " " .. tostring(self.attributes[i]) .. "=\"" .. tostring(self.attributes[i + 1]) .. "\""
		end
	end
	if self.self_closing then
		str = str .. "/"
	end
	return str .. ">"
end
]==]
local PercentEncoding = Node:new_class("percent-encoding")
PercentEncoding.next_display = HTMLEntity.next_display
PercentEncoding.next_raw = HTMLEntity.next_raw

function PercentEncoding:__tostring()
	return self.char
end

PercentEncoding.__rawstring = PercentEncoding.__tostring

local StripMarker = Node:new_class("strip marker")
StripMarker.next_display = Wikitext.next_display
StripMarker.next_raw = Wikitext.next_raw

local Wikilink = Node:new_class("wikilink")

function Wikilink:next(i)
	if i == 0 then
		i = self.prefix and "prefix" or "title"
	elseif i == "prefix" then
		i = "title"
	elseif i == "title" then
		i = self.fragment and "fragment" or "display"
	elseif i == "fragment" then
		i = "display"
	else
		return nil
	end
	return self[i], i
end

Wikilink.next_display = ExternalLink.next_display

function Wikilink:next_raw(i)
	if i == 0 or i == 1 then
		i = i + 1
		return "[", i, false
	elseif i == 2 then
		i = 3
		return ":", i, false
	elseif i == 3 then
		i = self.prefix and "prefix" or "title"
	elseif i == "prefix" then
		i = "title"
	elseif i == "title" then
		i = self.fragment and 4 or 5
		return self.fragment and "#" or "|", i, false
	elseif i == 4 then
		i = "fragment"
	elseif i == "fragment" then
		i = 5
		return "|", i, false
	elseif i == 5 then
		i = "display"
	elseif i == "display" then
		i = 6
		return "]", i, false
	elseif i == 6 then
		i = nil
		return "]", i, false
	else
		return nil
	end
	return self[i], i, true
end

local Prefix = Node:new_class("prefix")

function Prefix:next_raw(i)
	i = i + 1
	if i % 2 == 1 then
		return self[(i + 1) / 2], i, true
	end
	return ":", i, false
end
--[==[
function Prefix:__tostring()
	local output = {}
	for i = 1, #self do
		insert(output, tostring(self[i]))
	end
	return concat(output, ":") .. ":"
end
]==]
local Category = Node:new_class("category")
--[==[
function Category:__tostring()
	local sortkey = self.sortkey and tostring(self.sortkey) or nil
	return "[[Category:" .. tostring(self.title) .. (sortkey and "|" .. sortkey or "") .. "]]"
end
]==]
local Multipart = Node:new_class("multipart")
--[==[
function Multipart:__tostring()
	local output = {}
	for i = 1, #self do
		local v = self[i]
		insert(output, type(v) == "table" and v:__tostring() or tostring(v))
	end
	return concat(output, "‎<span class=\"Zsym mention\" style=\"font-size:100%;\">/</span>")
end
]==]

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

function Parser:push(route)
	local layer = setmetatable({
		head = self.head,
		route = route,
		len = 0
	}, Wikitext)
	local len = self.len + 1
	self[len] = layer
	self.n = layer
	self.len = len
end

function Parser:push_sublayer(handler)
	local layer = self.n
	rawset(layer, "__concat", layer.__concat)
	rawset(layer, "__index", layer)
	rawset(layer, "__newindex", layer)
	rawset(layer, "__pairs", layer.__pairs)
	rawset(layer, "__tostring", layer.__tostring)
	local sublayer = setmetatable({
		handler = handler,
		sublayer = true,
		len = 0
	}, layer)
	local len = self.len + 1
	self[len] = sublayer
	self.n = sublayer
	self.len = len
end

------------------------------------------------------------------------------------
--
-- Italics and bold
--
------------------------------------------------------------------------------------

-- A direct copy of doQuotes in Parser.php:
-- (1) '''''...''''' is treated as <i><b>...</b></i>, but an open ''''' is implicitly closed as <b><i>...</i></b>.
-- (2) A lone '' or ''' at the end of a line is treated as <i></i> or <b></b> respectively, but a lone ''''' is completely ignored.

-- adjust_style_apostrophes is run if the number of StyleApostrophes2 and StyleApostrophes3 on the line are both odd, which converts one of the StyleApostrophes3 into an apostrophe followed by StyleApostrophes2. Parsoid uses the following priorities, and picks the first occurrence of the highest priority found:
-- (1) After a single ASCII character after a normal space (" X'''").
-- (2) After multiple non-space characters ("XXX'''") or a non-ASCII character ("É'''").
-- (3) After a space (" '''").
-- Otherwise, no adjustment.
-- If the new apostrophe is added straight after a free external link then the new apostrophe becomes part of the link, so the preceding FreeExternalLinkClose must be moved ahead of it. Note that this will cause any trailing punctuation between the old end and the apostrophe to be included as part of the link as well, since the new apostrophe means it is no longer trailing: for example, ''https://example.com/!''' goes from being https://example.com/ followed by "!" to https://example.com/!' if adjusted (but remember that only one adjustment is made, even if the adjusted sequence is repeated).
function Parser:handle_odd_number_italics_and_bold(layer)
	local italics, bold = 0, 0
	for token in pairs(layer) do
		if token.type == "apostrophes" then
			if token.num ~= 2 then
				bold = bold + 1
			end
			if token.num ~= 3 then
				italics = italics + 1
			end
		end
	end
	if italics % 2 == 0 or bold % 2 == 0 then
		return
	end
	local word_token, word_parent, word_key,
		space_token, space_parent, space_key
	for i, token, proxy, key, parent in layer:pairs_raw() do
		if token.type == "apostrophes" and token.num == 3 then
			if proxy[i - 1] == " " then
				space_token = space_token or token
				space_parent = space_parent or parent
				space_key = space_key or key
			elseif proxy[i - 1] and proxy[i - 2] == " " then
				token.num = 2
				insert(parent, key, "'")
				parent.len = parent.len + 1
				return
			else
				word_token = word_token or token
				word_parent = word_parent or parent
				word_key = word_key or key
			end
		end
	end
	if word_token then
		word_token.num = 2
		insert(word_parent, word_key, "'")
		word_parent.len = word_parent.len + 1
		local i, prev = 0
		repeat
			i = i + 1
			prev = layer[word_key - i]
		until not (
			type(prev) == "string" and
			match(prev, "^[!%),%.:;%?\\]$")
		)
		if not (
			type(prev) == "table" and
			prev.type == "external link" and
			not prev.bracketed
		) then
			return
		end
		i = i - 1
		local token
		for _ = 1, i + 1 do
			token = remove(layer, word_key - i)
			layer.len = layer.len - 1
			insert(prev.url, token)
			prev.url.len = prev.url.len + 1
		end
	elseif space_token then
		space_token.num = 2
		insert(space_parent, space_key, "'")
		space_parent.len = space_parent.len + 1
	end
	return
end

function Parser:substitute_apostrophes(layer)
	local state, both_parent, both_key, final_parent, final_key
	for token, parent, key in pairs(layer) do
		final_parent = false
		final_key = false
		if token.type == "apostrophes" then
			if token.num == 5 then
				if state == "b" then
					parent[key] = HTMLTag:new{
						name = "b",
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = "i",
					})
					parent.len = parent.len + 1
					key = key + 1
					state = "i"
				elseif state == "i" then
					parent[key] = HTMLTag:new{
						name = "i",
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = "b",
					})
					parent.len = parent.len + 1
					key = key + 1
					state = "b"
				elseif state == "bi" then
					parent[key] = HTMLTag:new{
						name = "i",
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = "b",
						["end"] = true
					})
					parent.len = parent.len + 1
					key = key + 1
					state = ""
				elseif state == "ib" then
					parent[key] = HTMLTag:new{
						name = "b",
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = "i",
						["end"] = true
					})
					parent.len = parent.len + 1
					key = key + 1
					state = ""
				elseif state == "both" then
					parent[key] = HTMLTag:new{
						name = "b",
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = "i",
						["end"] = true
					})
					parent.len = parent.len + 1
					key = key + 1
					both_parent[both_key] = HTMLTag:new{
						name = "i",
					}
					insert(both_parent, both_key + 1, HTMLTag:new{
						name = "b",
					})
					both_parent.len = both_parent.len + 1
					if both_parent == parent then
						key = key + 1
					end
					both_parent = nil
					both_key = nil
					state = ""
				else
					both_parent = parent
					both_key = key
					state = "both"
				end
			else
				local this = token.num == 2 and "i" or "b"
				local other = this == "i" and "b" or "i"
				if state == this then
					parent[key] = HTMLTag:new{
						name = this,
						["end"] = true
					}
					state = ""
				elseif state == other .. this then
					parent[key] = HTMLTag:new{
						name = this,
						["end"] = true
					}
					state = other
				elseif state == this .. other then
					parent[key] = HTMLTag:new{
						name = other,
						["end"] = true
					}
					insert(parent, key + 1, HTMLTag:new{
						name = this,
						["end"] = true
					})
					insert(parent, key + 2, HTMLTag:new{
						name = other,
					})
					parent.len = parent.len + 2
					key = key + 2
					state = other
				elseif state == "both" then
					parent[key] = HTMLTag:new{
						name = this,
						["end"] = true
					}
					both_parent[both_key] = HTMLTag:new{
						name = other,
					}
					insert(both_parent, both_key + 1, HTMLTag:new{
						name = this,
					})
					both_parent.len = both_parent.len + 1
					if both_parent == parent then
						key = key + 1
					end
					both_parent = nil
					both_key = nil
					state = other
				else
					parent[key] = HTMLTag:new{
						name = this,
					}
					state = state == other and other .. this or this
				end
			end
			final_parent = parent
			final_key = key
		end
	end
	-- No open tags at the end of a line.
	if final_parent and (
		final_parent[final_key].type == "apostrophes" or
		not final_parent[final_key]["end"]
	) then
		final_parent[final_key] = nil
		final_parent.len = final_parent.len - 1
		if state == "i" or state == "b" or state == "both" then
			return
		end
		state = sub(state, 1, 1)
	end
	if state == "b" or state == "ib" then
		self:emit(HTMLTag:new{
			name = "b",
			["end"] = true
		})
	end
	if state == "i" or state == "bi" or state == "ib" then
		self:emit(HTMLTag:new{
			name = "i",
			["end"] = true
		})
	end
	if state == "bi" then
		self:emit(HTMLTag:new{
			name = "b",
			["end"] = true
		})
	elseif state == "both" then
		self:emit(HTMLTag:new{
			name = "i",
			["end"] = true
		})
		self:emit(HTMLTag:new{
			name = "b",
			["end"] = true
		})
		both_parent[both_key] = HTMLTag:new{
			name = "b",
		}
		insert(both_parent, both_key + 1, HTMLTag:new{
			name = "i",
		})
		both_parent.len = both_parent.len + 1
	end
	return
end

function Parser:finalize_line()
	if self.n.apos then
		self:handle_odd_number_italics_and_bold(self.n)
		self:substitute_apostrophes(self.n)
	end
	-- Conversions that need to be done after apostrophes have been processed.
	for token, parent, key in pairs(self.n) do
		if token.type == "html entity" then
			local char = token.char
			parent[key] = #char == 1 and char[1] or char
		elseif token.type == "percent-encoding" then
			parent[key] = char
		elseif (
			token.type == "external link" and
			token.display and
			#token.display == 0
		) then
			token.display = nil
		end
	end
end

------------------------------------------------------------------------------------
--
-- Apostrophes
--
------------------------------------------------------------------------------------

do
	local function handle_apostrophes(self, this)
		if this == "'" then
			self.n.apos = self.n.apos or {}
			insert(self.n.apos, self.head)
		else
			local apos = self.n.apos and #self.n.apos + 1 or 1
			if apos == 1 then
				return self:fail_route()
			elseif apos == 2 or apos == 3 or apos == 5 then
				self.n.num = apos
			elseif apos == 4 then
				self:emit("'")
				self.n.num = 3
			else
				for _ = 1, apos - 5 do
					self:emit("'")
				end
				self.n.num = 5
			end
			return self:pop()
		end
	end
	
	function Parser:do_apostrophes()
		self:set("handler", handle_apostrophes)
		self:advance()
	end
	
	function Parser:apostrophes()
		local apostrophes = self:get("do_apostrophes")
		if apostrophes == self.n.bad_route then
			return nil
		end
		self:emit_tokens(apostrophes)
		self:advance(-1)
		return Apostrophes:new(apostrophes)
	end
end

------------------------------------------------------------------------------------
--
-- Carriage return
--
------------------------------------------------------------------------------------

-- "\r" and "\r\n" are both treated as "\n".

function Parser:carriage_return(this)
	if self.n.override == self.carriage_return then
		self.n.override = nil
		if this ~= "\n" then
			self:advance(-1)
		end
		return self:consume("\n")
	end
	self.n.override = self.carriage_return
end

------------------------------------------------------------------------------------
--
-- Comment
--
------------------------------------------------------------------------------------

do
	-- Handlers.
	local handle_start
	local traverse_comment
	local handle_end
	
	function handle_start(self, this)
		self.n.i = self.n.i + 1
		if this ~= sub("<!--", self.n.i, self.n.i) then
			return self:fail_route()
		elseif self.n.i == 4 then
			self:advance()
			return traverse_comment(self)
		end
	end
	
	function traverse_comment(self)
		local this
		repeat
			this = self:read()
			if this == "-" then
				self.n.i = 1
				self.n.handler = handle_end
				self:advance()
				return self:traverse()
			elseif this == "" then
				return self:pop()
			end
			self:advance()
		until false
	end
	
	function handle_end(self, this)
		self.n.i = self.n.i + 1
		if this ~= sub("-->", self.n.i, self.n.i) then
			self:advance()
			return traverse_comment(self)
		elseif self.n.i == 3 then
			return self:pop()
		end
	end
	
	function Parser:do_comment()
		self:set("handler", handle_start)
		self.n.no_magic_word = true
		self.n.i = 1
		self:advance()
	end
	
	function Parser:comment()
		local comment = self:get("do_comment")
		if comment == self.n.bad_route then
			return self:consume()
		end
	end
end

------------------------------------------------------------------------------------
--
-- External link
--
------------------------------------------------------------------------------------

-- Note: the Parsoid implementation of URLs is pretty crude, and doesn't respect the URL spec at https://url.spec.whatwg.org/ in many cases.

do
	local function is_invalid(this)
		return this == "" or
			this == "\239\191\189" or -- U+FFFD Replacement Character
			this ~= "\t" and byte(this) <= 0x1F -- C0 control characters except \t
	end
	
	-- Handlers.
	local handle_bracketed_start
	local handle_double_bracketed_start
	local handle_bracketed_scheme
	local handle_free_scheme
	local handle_slashes
	local handle_after_scheme
	local handle_ip
	local handle_decoded_ip
	local handle_uri
	local handle_free_uri_trail
	local handle_bracketed_uri_whitespace
	local handle_uri_end
	local handle_bracketed_text
	
	-- If another "[" is found, record the position after it as wikilink_on_fail, which will be used as the head of a wikilink if this route fails.
	function handle_bracketed_start(self, this)
		if this == "[" then
			self.n.handler = handle_double_bracketed_start
			return
		end
		if this == "/" then
			self.n.handler = handle_slashes
			self.n.i = 0
		else
			self:push_sublayer(handle_bracketed_scheme)
		end
		return self:consume()
	end
	
	function handle_double_bracketed_start(self, this)
		self.n.wikilink_on_fail = self.head
		if this == "[" then
			return self:fail_route()
		end
		self.n.handler = handle_bracketed_start
		return self:consume()
	end
	
	function handle_bracketed_scheme(self, this)
		if this == ":" then
			local scheme = self:pop_sublayer()
			local normalized_scheme = lower(concat(scheme))
			if d.EL_SCHEMES_SLASHED[normalized_scheme] then
				self.n.handler = handle_slashes
				self.n.i = 0
			elseif d.EL_SCHEMES_UNSLASHED[normalized_scheme] then
				self.n.handler = handle_after_scheme
			else
				return self:fail_route()
			end
			self.n.scheme = Wikitext:new(scheme)
		elseif match(this, "^[%w%+%-%.]$") then
			self:emit(this)
		else
			return self:fail_route()
		end
	end
	
	function handle_free_scheme(self)
		local i, this, nxt = 0, ":"
		repeat
			i = i - 1
			this, nxt = self:emitted(i), this
		until type(this) ~= "string" or not match(this, "^[%w%+%-%.]$")
		if (
			match(nxt, "^%a$") and -- Schemes must start with a letter.
			not (type(this) == "string" and umatch(this, "^%w$"))
		) then
			local scheme = self:concat(-1, i + 1)
			local normalized_scheme = lower(scheme)
			self.n.scheme_pos = i + 1
			self.n.pattern = "^[!%),%.:;%?\\]$"
			if d.EL_SCHEMES_SLASHED[normalized_scheme] then
				self:push_sublayer(handle_slashes)
				self.n.i = 0
			elseif d.EL_SCHEMES_UNSLASHED[normalized_scheme] then
				self:push_sublayer(handle_after_scheme)
			else
				return self:fail_route()
			end
			self.n.scheme = Wikitext:new(scheme)
		else
			return self:fail_route()
		end
	end
	
	function handle_slashes(self, this)
		if this ~= "/" then
			return self:fail_route()
		end
		self:emit(this)
		self.n.i = self.n.i + 1
		if self.n.i == 2 then
			self.n.handler = handle_after_scheme
		end
	end
	
	-- Parsoid bugs:
	-- (1) Entities for "[" (e.g. &lsqb;) aren't treated as the start of an IP address.
	-- (2) Only "%5B" is converted to "[", not "%5b".
	function handle_after_scheme(self, this)
		self:push_sublayer(handle_uri)
		if this == "%" then
			this = self:percent_encoding() or "%"
			if rawstring(this) == "[" and this.code == "%5B" then
				self:emit(this)
				self:push_sublayer(handle_decoded_ip)
				return
			end
			self.head = this and this.head or self.head
		elseif this == "[" then
			self:emit("[")
			self:push_sublayer(handle_ip)
			return
		end
		return self:consume()
	end
	
	-- IP URLs starting with "[" must have a matching "]". Fails if a non-IP character is found, since "[" is otherwise invalid.
	function handle_ip(self, this)
		if this == "]" then
			if #self.n == 0 then
				return self:fail_route()
			end
			self:emit_tokens(self:pop_sublayer())
			self:emit("]")
			self.n.ip = true
			self.n.handler = handle_uri
		elseif match(this, "^[%x%.:]$") then
			self:emit(this)
		else
			return self:fail_route()
		end
	end
	
	-- IP URLs starting with "%5B" must have a matching "%5D". If a non-IP character is found, "[" is converted back to "%5B".
	-- Parsoid bug: Only "%5D" is converted to "]", not "%5d".
	function handle_decoded_ip(self, this)
		if this == "%" then
			this = self:percent_encoding() or "%"
			if (
				#self.n > 0 and
				rawstring(this) == "]" and
				this.code == "%5D"
			) then
				self:emit_tokens(self:pop_sublayer())
				self:emit(this)
				self.n.ip = true
				return
			end
			self.head = this and this.head or self.head
		elseif match(this, "^[%x%.:]$") then
			self:emit(this)
			return
		end
		self:emit_tokens(self:pop_sublayer())
		local i = 0
		repeat
			i = i - 1
			this = self:emitted(i)
		until rawstring(this) == "["
		self:replace(i, "%")
		self:emit(i + 1, "5")
		self:emit(i + 1, "B")
		return self:consume()
	end
	
	-- Note: Some valid wikitext characters which are invalid in URLs resolve to percent-encoding.
	-- Parsoid bugs:
		-- (1) In bracketed links, "<", ">" (and corresponding entities &lt; and &gt;) end the URI and start the text even if they come straight after the scheme, resulting in invalid targets like "https://".
		-- (2) In free links, the entities for "<", ">" and the non-breaking space are supposed to end the URI, but Parsoid doesn't account for &LT; &GT; and &NonBreakingSpace;.
	function handle_uri(self, this)
		local trail_pos = self.n.trail and #self.n + 1
		if this == "&" then
			this = self:html_entity()
			if not this then
				self:emit("&")
			elseif not this.char then
				this.char = Wikitext:new("\239\191\189") -- U+FFFD Replacement Character
				self:emit(this)
			else
				local decoded = rawstring(this)
				if decoded == " " then
					self:emit("+")
				elseif self.n.bracketed and (
					this.code == "&lt;" or
					this.code == "&gt;"
				) or not self.n.bracketed and (
					decoded == "<" and this.code ~= "&LT;" or
					decoded == ">" and this.code ~= "&GT;" or
					decoded == "\194\160" and this.code ~= "&NonBreakingSpace;"
				) then
					self.head = this.head
					return handle_uri_end(self, trail_pos)
				elseif match(decoded, "^[\t\n\"<>%[%]|]$") then
					this = format("%02X", byte(decoded))
					self:emit("%")
					self:emit(sub(this, 1, 1))
					self:emit(sub(this, 2, 2))
				else
					self:emit(this)
				end
			end
		elseif this == "'" then
			this = self:apostrophes()
			if this then
				self.n.apos = true
				self.head = this.head
				return handle_uri_end(self, trail_pos)
			end
			self:emit("'")
		elseif this == "]" then
			return handle_uri_end(self, trail_pos, true)
		elseif not self.n.bracketed and this == "(" then -- Remove ")" from the trail pattern.
			self.n.pattern = "^[!,%.:;%?\\]$"
			self:emit("(")
		elseif this == "|" then
			self:emit("%")
			self:emit("7")
			self:emit("C")
		elseif this == "\127" then
			this = self:strip_marker()
			if this then
				self.head = this.head
				return handle_uri_end(self, trail_pos)
			end
			self:emit("?")
		elseif not self.n.bracketed and match(this, self.n.pattern) then
			self:push_sublayer(handle_free_uri_trail)
			self.n.trail_head = self.head
			return self:consume()
		elseif d.SPACE_SEPARATOR[this] then
			if not self.n.bracketed then
				return handle_uri_end(self, trail_pos)
			end
			self.n.handler = handle_bracketed_uri_whitespace
		elseif match(this, "^[\"<>%[]$") then
			return handle_uri_end(self, trail_pos)
		elseif is_invalid(this) then
			if self.n.bracketed then
				return self:fail_route()
			end
			return handle_uri_end(self, trail_pos)
		elseif #this > 1 and (
			d.IGNORED_IN_URI[this] or
			match(this, "^\243\160[\128-\191][\128-\191]$") -- U+E0000–E0FFF
		) then
			return
		else
			self:emit(this)
		end
		if self.n.trail and #self.n >= trail_pos then
			self:emit_tokens(trail_pos, self.n.trail)
			self.n.trail = nil
		end
	end
	
	-- Gather any trail characters and save them. Later, they will be added to the URI if we know that the end doesn't come straight after them. If it does, they'll be discarded and the head set to the start of the trail. Note: Parsoid never adds decoded entities to the trail.
	-- Parsoid bug: If "(" is given as an entity, it does not cause ")" to be excluded from the trail characters.
	function handle_free_uri_trail(self, this)
		if match(this, self.n.pattern) then
			self:emit(this)
		else
			local trail = self:pop_sublayer()
			self.n.trail = trail
			return self:consume()
		end
	end
	
	function handle_bracketed_uri_whitespace(self, this)
		if not d.SPACE_SEPARATOR[this] then
			return handle_uri_end(self)
		end
	end
	
	-- Fail if end comes straight after the scheme (+ slashes where applicable). If wikilink_on_fail is set (e.g. [[https://]]), then reset it to nil, since Parsoid won't parse it as a wikilink either (even though "https://" is a valid title!).
	-- For free links, we also still need to determine if the trail needs to be added (e.g. there may be excess apostrophes after it).
	-- IP square brackets use percent-encoding if the URI continues after "]", even if entered as raw characters.
	function handle_uri_end(self, trail_pos, force_pop)
		if #self.n == 0 then
			self.n.wikilink_on_fail = nil
			return self:fail_route()
		elseif self.n.trail then
			if #self.n >= trail_pos then
				self:emit_tokens(trail_pos, self.n.trail)
			else
				self.head = self.n.trail_head
			end
		end
		self:emit_tokens(self:pop_sublayer())
		if self.n.ip and rawstring(self:emitted()) ~= "]" then
			local i, this = 0
			repeat
				i = i - 1
				this = self:emitted(i)
				if rawstring(this) == "]" then
					self:replace(i, "%")
					self:emit(i + 1, "5")
					self:emit(i + 1, "D")
				elseif rawstring(this) == "[" then
					self:replace(i, "%")
					self:emit(i + 1, "5")
					self:emit(i + 1, "B")
					break
				end
			until not this
		end
		local url = Wikitext:new(self:pop_sublayer())
		self.n.url = url
		if not self.n.bracketed or force_pop then
			return self:pop()
		end
		self:push_sublayer(handle_bracketed_text)
		return self:consume()
	end
	
	function handle_bracketed_text(self, this)
		if this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == "'" then
			this = self:apostrophes()
			self.n.apos = self.n.apos or this and true or nil
			self:emit(this or "'")
		elseif this == "<" then
			self:html_tag()
		elseif this == "]" then
			local raw_display = self:pop_sublayer()
			self.n.display = Wikitext:new(raw_display)
			return self:pop()
		elseif this == "\127" then
			self:emit(self:strip_marker() or "?")
		elseif is_invalid(this) then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
	
	function Parser:do_bracketed_external_link()
		self:set("handler", handle_bracketed_start)
		self.n.bracketed = true
		self:advance()
		self:push_sublayer()
	end
	
	function Parser:bracketed_external_link()
		local link = self:get("do_bracketed_external_link")
		if link == self.n.bad_route then
			if link.wikilink_on_fail then
				self:wikilink(link.wikilink_on_fail)
			else
				self:emit("[")
			end
		else
			if link.wikilink_on_fail then
				self:emit("[")
			end
			self:emit(ExternalLink:new(link))
		end
	end
	
	function Parser:do_free_external_link()
		self:set("handler", handle_free_scheme)
	end
	
	function Parser:free_external_link()
		local link = self:get("do_free_external_link")
		if link == self.n.bad_route then
			if self.n.dl then
				self:emit(self.DescriptionListSeparator)
				self.n.dl = nil
			else
				self:emit(":")
			end
		else
			-- Account for already-emitted scheme.
			for _ = -1, link.scheme_pos, -1 do
				self:remove()
			end
			self:emit(ExternalLink:new(link))
			self:advance(-1)
		end
	end
end

------------------------------------------------------------------------------------
--
-- Heading
--
------------------------------------------------------------------------------------

do
	-- Handlers.
	local handle_start
	local handle_start_whitespace
	local handle_start_excess
	local handle_only_equals_signs
	local handle_body
	local handle_body_whitespace
	local handle_end
	local handle_end_whitespace
	
	function handle_start(self, this)
		if this == "=" then
			self.n.eq = self.n.eq + 1
		elseif this == "\n" or this == "" then
			return handle_only_equals_signs(self)
		elseif this == " " or this == "\t" then
			self.n.handler = handle_start_whitespace
		else
			handle_start_excess(self)
			self.n.handler = handle_body
			return self:consume()
		end
	end
	
	function handle_start_whitespace(self, this)
		if this == "\n" or this == "" then
			return handle_only_equals_signs(self)
		elseif this ~= " " and this ~= "\t" then
			handle_start_excess(self)
			self.n.handler = handle_body
			return self:consume()
		end
	end
	
	-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
	function handle_start_excess(self)
		if self.n.eq > 6 then
			for _ = 1, self.n.eq - 6 do
				self:emit("=")
			end
			self.n.eq = 6
		end
	end
	
	-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
	function handle_only_equals_signs(self)
		if self.n.eq < 3 then
			return self:fail_route()
		end
		-- Calculate which equals signs determine the heading level.
		local eq = self.n.eq - 1
		eq = eq - eq % 2
		eq = eq > 12 and 12 or eq
		-- Emit the excess.
		for _ = 1, self.n.eq - eq do
			self:emit("=")
		end
		self.n.level = eq / 2
		return self:pop()
	end
	
	function handle_body(self, this)
		if this == "=" then
			local end_eq = self:get("do_heading_end")
			if end_eq == self.n.bad_route then -- = signs are just part of the heading.
				self:advance(#self.n.bad_route)
				self:emit_tokens(self.n.bad_route)
				return self:consume()
			elseif end_eq > self.n.eq then
				for _ = 1, end_eq - self.n.eq do
					self:emit("=")
				end
				self.n.level = self.n.eq
				return self:pop()
			end
			for _ = 1, self.n.eq - end_eq do
				self:emit(1, "=")
			end
			-- Remove already-emitted whitespace before end.
			local this = self:emitted()
			while this == " " or this == "\t" do
				self:remove()
				this = self:emitted()
			end
			self.n.level = end_eq
			return self:pop()
		elseif this == " " or this == "\t" then
			self:emit(this)
			self.n.override = handle_body_whitespace
		elseif this == "\n" or this == "" then
			return self:fail_route()
		elseif this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == "'" then
			this = self:apostrophes()
			self.n.apos = self.n.apos or this and true or nil
			self:emit(this or "'")
		elseif this == ":" then
			self:free_external_link()
		elseif this == "<" then
			self:html_tag()
		elseif this == "I" or this == "P" or this == "R" then
			self:magic_link(this)
		elseif this == "[" then
			self:bracketed_external_link()
		elseif this == "\127" then
			self:emit(self:strip_marker() or "?")
		else
			self:emit(this)
		end
	end
	
	function handle_body_whitespace(self, this)
		if this ~= " " and this ~= "\t" then
			self.n.override = nil
			return self:consume()
		end
	end
	
	function handle_end(self, this)
		if this == "=" then
			self:emit("=")
		elseif this == "\n" or this == "" then
			return #self:pop()
		elseif this == " " or this == "\t" then
			self.n.handler = handle_end_whitespace
		else
			return self:fail_route()
		end
	end
	
	function handle_end_whitespace(self, this)
		if this == "\n" or this == "" then
			return #self:pop()
		elseif this ~= " " and this ~= "\t" then
			return self:fail_route()
		end
	end
	
	function Parser:do_heading()
		self:set("handler", handle_start)
		self.n.eq = 1
		self:advance()
	end
	
	function Parser:do_heading_end()
		self:set("handler", handle_end)
	end
	
	function Parser:heading()
		local heading = self:get("do_heading")
		if heading ~= self.n.bad_route then
			self:emit(HTMLTag:new{
				name = Wikitext:new("h" .. heading.level)
			})
			self:emit(HTMLTag:new{
				name = Wikitext:new("span"),
				attributes = {
					Wikitext:new("class"),
					Wikitext:new("mw-headline"),
					Wikitext:new("id"),
					export.parse_nowiki(anchor_encode(tostring(heading)))
				}
			})
			self:emit_tokens(heading)
			self:emit(HTMLTag:new{
				name = Wikitext:new("span"),
				["end"] = true
			})
			self:emit(HTMLTag:new{
				name = Wikitext:new("h" .. heading.level),
				["end"] = true
			})
		end
		self:advance(-1)
	end
end

------------------------------------------------------------------------------------
--
-- Horizontal rule
--
------------------------------------------------------------------------------------


do
	local function handle_horizontal_rule(self, this)
		if this == "-" then
			self.n.i = self.n.i + 1
		elseif self.n.i >= 4 then
			self:pop()
			return true
		else
			return self:fail_route()
		end
	end
	
	function Parser:do_horizontal_rule()
		self:set("handler", handle_horizontal_rule)
		self.n.i = 1
		self:advance()
	end
	
	function Parser:horizontal_rule()
		local horizontal_rule = self:get("do_horizontal_rule")
		if horizontal_rule ~= self.n.bad_route then
			self:emit(HTMLTag:new{
				name = Wikitext:new("hr"),
				self_closing = true
			})
		end
		self:advance(-1)
	end
end

------------------------------------------------------------------------------------
--
-- HTML entity
--
------------------------------------------------------------------------------------

-- Parsoid regex: &([A-Za-z0-9\x80-\xff]+;)|&\#([0-9]+)|&\#[xX]([0-9A-Fa-f]+)|(&)

-- If the route decodes to an invalid entity (e.g. &#xD800;), then the route still succeeds, but the output is the original wikitext. This matches Parsoid, which processes such entities but makes the output the same as the input string. This means that inputs such as [[&#xD800;]] are treated as attempted links to pages with an HTML entity in the title (invalid), and not as a link to "&" with the fragment "xD800;".

-- Characters which are never valid in HTML entities. Note that non-ASCII characters are treated as valid in entity names by the Parsoid regex, since it supports some nonstandard entities that use them.

do
	local function is_invalid(this)
		return not not (
			this == "" or
			#this == 1 and not match(this, "^%w$")
		)
	end
	
	-- Converts a codepoint to the equivalent character. Characters which aren't decoded by Parsoid return nil.
	local function utf8_char(cp)
		if (
			cp <= 0x08 or
			cp >= 0x0B and cp <= 0x1F or
			cp >= 0x7F and cp <= 0x9F or
			cp >= 0xD800 and cp <= 0xDFFF or
			cp == 0xFFFE or cp == 0xFFFF or
			cp > 0x10FFFF
		) then
			return nil
		elseif cp < 0x80 then
			return char(cp)
		elseif cp < 0x800 then
			return char(
				0xC0 + cp / 0x40,
				0x80 + cp % 0x40
			)
		elseif cp < 0x10000 then
			return char(
				0xE0 + cp / 0x1000,
				0x80 + cp / 0x40 % 0x40,
				0x80 + cp % 0x40
			)
		end
		return char(
			0xF0 + cp / 0x40000,
			0x80 + cp / 0x1000 % 0x40,
			0x80 + cp / 0x40 % 0x40,
			0x80 + cp % 0x40
		)
	end
	
	-- Handlers.
	local handle_start
	local handle_numeric
	local handle_numeric_code
	local handle_dec_code
	local handle_hex_code
	local handle_named
	local handle_percent_encoding
	
	function handle_start(self, this)
		if this == "#" then
			self:emit(this)
			self.n.handler = handle_numeric
		elseif this == "%" then
			return handle_percent_encoding(self)
		elseif is_invalid(this) then
			return self:fail_route()
		else
			self:emit(this)
			self.n.handler = handle_named
		end
	end
	
	function handle_numeric(self, this)
		if this == "%" then
			return handle_percent_encoding(self)
		elseif this == "X" or this == "x" then
			self:emit(this)
			self.n.handler = handle_hex_code
		elseif match(this, "^%d$") then
			self:emit(this)
			self.n.handler = handle_dec_code
		else
			return self:fail_route()
		end
	end
	
	function handle_numeric_code(self, this, format, start, base)
		if this == "%" then
			return handle_percent_encoding(self)
		elseif this == ";" then
			local char = utf8_char(tonumber(self:concat(start), base))
			if not char then
				self.n.no_char = true
				return self:fail_route()
			end
			self:emit(";")
			self.n.char = Wikitext:new(char)
			return self:pop()
		elseif not match(this, format) then
			return self:fail_route()
		end
		self:emit(this)
	end
	
	function handle_dec_code(self, this)
		return handle_numeric_code(self, this, "^%d$", 3)
	end
	
	function handle_hex_code(self, this)
		return handle_numeric_code(self, this, "^%x$", 4, 16)
	end
	
	function handle_named(self, this)
		if this == "%" then
			return handle_percent_encoding(self)
		elseif this == ";" then
			local char = load_data("Module:data/entities")[self:concat(2)]
			if not char then
				self.n.no_char = true
				return self:fail_route()
			end
			self:emit(";")
			self.n.char = Wikitext:new(char)
			return self:pop()
		elseif is_invalid(this) then
			return self:fail_route()
		end
		self:emit(this)
	end
	
	function handle_percent_encoding(self)
		if not self.n.decode_percent then
			return self:fail_route()
		end
		local this = rawstring(self:percent_encoding() or "%")
		if this == "%" then -- Avoid double-decoding.
			return self:fail_route()
		end
		return self:consume(this)
	end
	
	-- `decode_percent` denotes underlying contexts in which percent-decoding should be attempted, since Parsoid decodes percent-encoding then HTML entities in that order (e.g. "%26%79%65%6E%3B" → "&yen;" → "¥").
	function Parser:do_html_entity(decode_percent)
		self:set("handler", handle_start)
		self.n.decode_percent = decode_percent
		self:emit("&")
		self:advance()
	end
	
	-- Returns nil if the parse fails (e.g. "&exam ple;" or "&#123x;"), and false if no character can be decoded (e.g. "&notvalid;" is not associated with any character, "&#0;" is a codepoint that doesn't get resolved, and "&#x100000;" is a codepoint that's too high). This is because the second type will cause wikilinks to fail, whereas the first will not.
	function Parser:html_entity(decode_percent)
		local entity = self:get("do_html_entity", decode_percent)
		if entity == self.n.bad_route then
			if self.n.bad_route.no_char then
				return false
			end
			return nil
		end
		entity.code = concat(entity)
		return HTMLEntity:new(entity)
	end
end

------------------------------------------------------------------------------------
--
-- HTML tag
--
------------------------------------------------------------------------------------

do
	-- HTML whitespace.
	local function is_space(this)
		return not not (
			this == " " or
			this == "\t" or
			this == "\n" or
			this == "\f"
		)
	end
	
	-- Handlers.
	local handle_start
	local handle_open_tag_name
	local handle_before_attribute_name
	local handle_attribute_name
	local handle_after_attribute_name
	local handle_before_attribute_value
	local handle_quoted_attribute_value
	local handle_unquoted_attribute_value
	local handle_self_closing_tag
	local handle_end_tag_start
	local handle_end_tag_name
	local handle_end_tag_remainder
	
	function handle_start(self, this)
		if this == "/" then
			self:push_sublayer(handle_end_tag_start)
		elseif match(this, "^%a$") then
			self:push_sublayer(handle_open_tag_name)
			self:emit(lower(this))
		else
			return self:fail_route()
		end
	end
	
	function handle_open_tag_name(self, this)
		if this == "/" then
			local name = Wikitext:new(self:pop_sublayer())
			self.n.name = name
			self.n.handler = handle_self_closing_tag
		elseif this == ">" then
			local name = Wikitext:new(self:pop_sublayer())
			self.n.name = name
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		elseif is_space(this) then
			local name = Wikitext:new(self:pop_sublayer())
			self.n.name = name
			self:push_sublayer(handle_before_attribute_name)
		elseif match(this, "^%u$") then
			self:emit(lower(this))
		else
			self:emit(this)
		end
	end
	
	function handle_before_attribute_name(self, this)
		if this == "/" then
			self.n.handler = handle_self_closing_tag
		elseif this == "=" then
			self:push_sublayer(handle_attribute_name)
			self:emit("=")
		elseif this == ">" then
			local attributes = self:pop_sublayer()
			if #attributes > 0 then
				self.n.attributes = attributes
			end
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		elseif not is_space(this) then
			self:push_sublayer(handle_attribute_name)
			return self:consume()
		end
	end
	
	function handle_attribute_name(self, this)
		if this == "/" or this == ">" or is_space(this) then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self.n.handler = handle_after_attribute_name
			return self:consume()
		elseif this == "=" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self.n.handler = handle_before_attribute_value
		elseif this == "" then
			return self:fail_route()
		elseif match(this, "^%u$") then
			self:emit(lower(this))
		else
			self:emit(this)
		end
	end
	
	function handle_after_attribute_name(self, this)
		if this == "/" then
			self:emit(Wikitext:new{})
			self.n.handler = handle_self_closing_tag
		elseif this == "=" then
			self.n.handler = handle_before_attribute_value
		elseif this == ">" then
			self:emit(Wikitext:new{})
			local attributes = self:pop_sublayer()
			self.n.attributes = attributes
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		elseif not is_space(this) then
			self:emit(Wikitext:new{})
			self:push_sublayer(handle_attribute_name)
			return self:consume()
		end
	end
	
	function handle_before_attribute_value(self, this)
		if this == "\"" or this == "'" then
			self:push_sublayer(handle_quoted_attribute_value)
			self:set("quoter", this)
		elseif this == ">" then
			self:emit(Wikitext:new{})
			local attributes = self:pop_sublayer()
			self.n.attributes = attributes
			return self:pop()
		elseif not is_space(this) then
			self:push_sublayer(handle_unquoted_attribute_value)
			return self:consume()
		end
	end
	
	function handle_quoted_attribute_value(self, this)
		if this == self.n.quoter then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self.n.handler = handle_before_attribute_name
		elseif this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == "" then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
	
	function handle_unquoted_attribute_value(self, this)
		if this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == ">" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			local attributes = self:pop_sublayer()
			self.n.attributes = attributes
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		elseif is_space(this) then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self.n.handler = handle_before_attribute_name
		else
			self:emit(this)
		end
	end
	
	function handle_self_closing_tag(self, this)
		if this == ">" then
			self.n.self_closing = true
			local attributes = self:pop_sublayer()
			if #attributes > 0 then
				self.n.attributes = attributes
			end
			return self:pop()
		end
		self.n.handler = handle_before_attribute_name
		return self:consume()
	end
	
	function handle_end_tag_start(self, this)
		if match(this, "^%a$") then
			self.n["end"] = true
			self:emit(lower(this))
			self.n.handler = handle_end_tag_name
		else
			return self:fail_route()
		end
	end
	
	function handle_end_tag_name(self, this)
		if this == "/" or is_space(this) then
			local name = Wikitext:new(self:pop_sublayer())
			self.n.name = name
			self.n.handler = handle_end_tag_remainder
		elseif this == ">" then
			local name = Wikitext:new(self:pop_sublayer())
			self.n.name = name
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		elseif match(this, "^%u$") then
			self:emit(lower(this))
		else
			self:emit(this)
		end
	end
	
	function handle_end_tag_remainder(self, this)
		if this == ">" then
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		end
	end
	
	function Parser:do_html_tag()
		self:set("handler", handle_start)
		self:advance()
	end
	
	function Parser:html_tag()
		local tag = self:get("do_html_tag")
		if tag == self.n.bad_route then
			self:emit("<")
		else
			self:emit(HTMLTag:new(tag))
		end
	end
end

------------------------------------------------------------------------------------
--
-- Magic link
--
------------------------------------------------------------------------------------

-- Parsoid regexes:
	-- ISBN: \bISBN$spaces((?:97[89]$spdash?)?(?:[0-9]$spdash?){9}[0-9Xx]\b
	-- PMID/RFC: \b(?:RFC|PMID)$spaces([0-9]+)\b
	-- where:
	-- $spaces is (?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})++
	-- $spdash is (?:-|\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})

do
	-- Handlers.
	local handle_prefix
	local handle_whitespace
	local handle_isbn13_number_first
	local handle_isbn_number
	local handle_isbn_spdash
	local handle_isbn_end
	local handle_other_number
	
	function handle_prefix(self, this)
		self.n.i = self.n.i + 1
		if this ~= sub(self.n.prefix, self.n.i, self.n.i) then
			return self:fail_route()
		end
		self:emit(this)
		if self.n.i == #self.n.prefix then
			if (
				type(self.n.prev) == "string" and
				umatch(self.n.prev, "^%w$")
			) then
				return self:fail_route()
			end
			self.n.handler = handle_whitespace
		end
	end
	
	function handle_whitespace(self, this)
		if this == "&" then
			this = self:html_entity()
			if (
				not this or
				this.code == "&NonBreakingSpace;" or
				rawstring(this) ~= "\194\160"
			) then
				return self:fail_route()
			end
			self.n.ws_found = true
		elseif d.SPACE_SEPARATOR[this] then
			self.n.ws_found = true
		elseif match(this, "^%d$") then
			self:emit(" ")
			if self.n.prefix == "ISBN" then
				local number = self:get("do_isbn_number")
				if number == self.n.bad_route then
					return self:fail_route()
				end
				self:emit_tokens(number)
				return self:pop()
			end
			self.n.handler = handle_other_number
			return self:consume()
		else
			return self:fail_route()
		end
	end
	
	-- spdash is not allowed between the first three digits of an ISBN13 number.
	function handle_isbn13_number_first(self, this)
		self.n.i = self.n.i + 1
		if (
			self.n.i == 1 and this ~= "9" or
			self.n.i == 2 and this ~= "7" or
			self.n.i == 3 and this ~= "8" and this ~= "9"
		) then
			return self:fail_route()
		end
		self:emit(this)
		if self.n.i == 3 then
			return self:pop()
		end
	end
	
	function handle_isbn_number(self, this)
		self.n.i = self.n.i + 1
		if self.n.i == 10 and match(this, "^[%dXx]$") then
			self:emit(this)
			self.n.handler = handle_isbn_end
		elseif self.n.i < 10 and match(this, "^%d$") then
			self:emit(this)
			self.n.override = handle_isbn_spdash
		else
			return self:fail_route()
		end
	end
	
	function handle_isbn_spdash(self, this)
		self.n.override = nil
		if this == "&" then
			this = self:html_entity()
			if (
				not this or
				this.code == "&NonBreakingSpace;" or
				rawstring(this) ~= "\194\160"
			) then
				return self:fail_route()
			end
			self:emit("\194\160")
		elseif this == "-" or d.SPACE_SEPARATOR[this] then
			self:emit(this)
		else
			return self:consume()
		end
	end
	
	function handle_isbn_end(self, this)
		if not umatch(this, "^%w$") then
			return self:pop()
		end
		return self:fail_route()
	end
	
	function handle_other_number(self, this)
		if match(this, "^%d$") then
			self:emit(this)
		elseif umatch(this, "^%w$") then
			return self:fail_route()
		else
			return self:pop()
		end
	end
	
	function Parser:do_magic_link(this)
		self:set("handler", handle_prefix)
		self.n.prev = self:emitted()
		self.n.prefix = this == "I" and "ISBN" or
			this == "P" and "PMID" or
			this == "R" and "RFC"
		self.n.i = 0
	end
	
	function Parser:do_isbn_number()
		self:set("handler", handle_isbn_number)
		local isbn13_first = self:get("do_isbn13_number_first")
		if isbn13_first ~= self.n.bad_route then
			local isbn13_rem = self:get("do_isbn13_number_remainder")
			if isbn13_rem ~= self.n.bad_route then
				self:emit_tokens(isbn13_first)
				self:emit_tokens(isbn13_rem)
				return self:pop()
			end
			self.head = isbn13_first.head
		end
		self.n.i = 0
	end
	
	function Parser:do_isbn13_number_first()
		self:set("handler", handle_isbn13_number_first)
		self.n.i = 0
	end
	
	function Parser:do_isbn13_number_remainder()
		self:set("handler", handle_isbn_number)
		self.n.i = 0
		self.n.override = handle_isbn_spdash
		self:advance()
	end
	
	function Parser:magic_link(this)
		local magic_link = self:get("do_magic_link", this)
		if magic_link == self.n.bad_route then
			self:emit(this)
			return
		elseif magic_link.prefix == "ISBN" then
			local prefix = Prefix:new{Wikitext:new("Special")}
			local title = Wikitext:new("BookSources/")
			for i = 6, #magic_link do
				if match(magic_link[i], "^[%dXx]$") then
					insert(title, upper(magic_link[i]))
				end
			end
			self:emit(Wikilink:new{
				prefix = prefix,
				title = title,
				display = Wikitext:new(magic_link)
			})
		else
			local url, scheme, i, c
			if magic_link.prefix == "PMID" then
				url, i, c = explode("//www.ncbi.nlm.nih.gov/pubmed/?dopt=Abstract"), 6, 25
			else
				url, i, c = explode("//tools.ietf.org/html/rfc"), 5, 21
				scheme = "https"
			end
			for n = i, #magic_link do
				insert(url, n + c, magic_link[n])
			end
			self:emit(ExternalLink:new{
				scheme = scheme and Wikitext:new(scheme) or nil,
				url = Wikitext:new(url),
				display = Wikitext:new(magic_link),
				bracketed = true
			})
		end
		self:advance(-1)
	end
end

------------------------------------------------------------------------------------
--
-- Magic word
--
------------------------------------------------------------------------------------

do
	-- Handlers.
	local handle_start
	local handle_body
	local handle_end
	
	function handle_start(self, this)
		if this ~= "_" then
			return self:fail_route()
		end
		self.n.handler = handle_body
	end
	
	function handle_body(self, this)
		if this == "_" then
			self.n.handler = handle_end
		elseif match(this, "^%a$") then
			self:emit(this)
		else
			return self:fail_route()
		end
	end
	
	function handle_end(self, this)
		if this == "_" then
			local magic_word = self:concat()
			if d.MAGIC_WORDS_CS[magic_word] then -- Case sensitive.
				return uupper(magic_word)
			end
			magic_word = uupper(magic_word)
			if d.MAGIC_WORDS_NOT_CS[magic_word] then -- Case insensitive.
				return magic_word
			end
			return self:fail_route()
		elseif match(this, "^%a$") then
			self:emit("_")
			self:emit(this)
			self.n.handler = handle_body
		else
			return self:fail_route()
		end
	end
	
	function Parser:do_magic_word()
		self:set("handler", handle_start)
		self.n.no_magic_word = true
		self:advance()
	end
	
	function Parser:magic_word()
		if self.n.no_magic_word then
			return self:consume()
		end
		local magic_word = self:get("do_magic_word")
		if magic_word == self.n.bad_route then
			return self:consume()
		end
		self:pop()
		if not self.n.magic_words then
			self.magic_words = {}
		end
		insert(self.magic_words, magic_word)
	end
end

------------------------------------------------------------------------------------
--
-- Newline
--
------------------------------------------------------------------------------------

-- If a newline is found, the current layer is retained as the main layer for the current parse, but sublayers are used for each subsequent newline. This allows finalize_line to do line-by-line postprocessing (matching Parsoid), which can then be emitted to the main layer once finalised.
function Parser:newline()
	-- Remove already-emitted whitespace before end.
	local this = self:emitted()
	while this == " " or this == "\t" do
		self:remove()
		this = self:emitted()
	end
	self:finalize_line()
	if self.n.sublayer then
		self:emit_tokens(self:pop_sublayer())
	end
	self:emit("\n")
	self:push_sublayer()
end

------------------------------------------------------------------------------------
--
-- Multipart
--
------------------------------------------------------------------------------------

function Parser:multipart(data, on_fail)
	data.route[2] = true -- multipart
	data.route[3] = 1 -- head
	if on_fail then
		data.allow_fail = true
		on_fail.route[2] = true -- multipart
	end
	local parser, ok, tokens, sections = 1
	while true do
		ok, tokens, parser = Parser:parse(data)
		if not ok then
			on_fail.route[3] = data.route[3] -- head
			tokens, parser = select(2, Parser:parse(on_fail))
		end
		if parser["end"] then
			break
		end
		data.route[3] = parser.head + 1
		sections = sections or {}
		insert(sections, tokens)
	end
	if sections then
		insert(sections, tokens)
		return Multipart:new(sections)
	end
	return tokens
end

------------------------------------------------------------------------------------
--
-- Percent-encoding
--
------------------------------------------------------------------------------------

-- If decoding fails, this will normally cause the containing wikilink to fail, since any bytes decoded up to that point would decode to an invalid UTF-8 sequence on their own, which in invalid anywhere in a link. However, if if decoding fails on the leading byte due to an invalid raw character, then the wikilink will not fail, because the link will not contain any valid percent-encodings. e.g. [[%0G]] is a valid link, but [[%C2%0G]] and [[foo#%80]] will both fail, since "%C2" must have a trailing byte and "%80" can't be a leading byte in UTF-8.

do
	-- Handlers.
	local handle_leading_byte
	local handle_trailing_byte
	local handle_digit
	
	function handle_leading_byte(self)
		local byte = self:get("do_digit")
		if self.n.bad_route then
			self.n.no_fail_wikilink = true
			return self:fail_route()
		elseif (
			byte.val > 0x7F and byte.val < 0xC2 or
			byte.val > 0xF4
		) then
			return self:fail_route()
		end
		self:emit_tokens(byte)
		if byte.val < 0x80 then
			self.n.char = char(byte.val)
			return self:pop()
		end
		self.n.bytes = {byte.val}
		self.n.num = byte.val < 0xE0 and 2 or byte.val < 0xF0 and 3 or 4
		self.n.handler = handle_trailing_byte
	end
	
	function handle_trailing_byte(self, this)
		if this ~= "%" then
			return self:fail_route()
		end
		local byte = self:get("do_digit")
		if (
			byte == self.n.bad_route or
			byte.val < 0x80 or
			byte.val > 0xBF or
			#self.n.bytes == 1 and (
				self.n.bytes[1] == 0xE0 and byte.val < 0xA0 or
				self.n.bytes[1] == 0xED and byte.val > 0x9F or
				self.n.bytes[1] == 0xF0 and byte.val < 0x90 or
				self.n.bytes[1] == 0xF4 and byte.val > 0x8F
			)
		) then
			return self:fail_route()
		end
		self:emit_tokens(byte)
		insert(self.n.bytes, byte.val)
		if #self.n.bytes == self.n.num then
			self.n.char = char(unpack(self.n.bytes))
			return self:pop()
		end
	end
	
	function handle_digit(self, this)
		if not match(this, "^%x$") then
			return self:fail_route()
		end
		self:emit(this)
		self.n.i = self.n.i + 1
		if self.n.i == 2 then
			self.n.val = tonumber(self:concat(2), 16)
			return self:pop()
		end
	end
	
	function Parser:do_percent_encoding()
		self:set("handler", handle_leading_byte)
	end
	
	function Parser:do_digit()
		self:set("handler", handle_digit)
		self.n.i = 0
		self:emit("%")
		self:advance()
	end
	
	function Parser:percent_encoding()
		local percent = self:get("do_percent_encoding")
		if percent == self.n.bad_route then
			return self.n.bad_route.no_fail_wikilink and "%" or nil
		end
		percent.code = concat(percent)
		return PercentEncoding:new(percent)
	end
end

------------------------------------------------------------------------------------
--
-- Strip marker
--
------------------------------------------------------------------------------------

do
	local unstrip_nowiki = mw.text.unstripNoWiki
	
	-- Handlers.
	local handle_prefix
	local handle_tag
	local handle_hex_code
	local handle_dec_code
	local handle_suffix
	
	function handle_prefix(self, this)
		self.n.i = self.n.i + 1
		if this ~= sub("'\"`UNIQ--", self.n.i, self.n.i) then
			return self:fail_route()
		end
		self:emit(this)
		if self.n.i == 9 then
			self.n.handler = handle_tag
		end
	end
	
	function handle_tag(self, this)
		if this == "-" then
			self.n.tag = self:concat(11)
			self:emit("-")
			if d.STRIP_MARKERS_HEX[self.n.tag] then
				self.n.i = 0
				self.n.handler = handle_hex_code
			elseif d.STRIP_MARKERS_DEC[self.n.tag] then
				self.n.handler = handle_dec_code
			else
				return self:fail_route()
			end
		elseif match(this, "^%l$") then
			self:emit(this)
		else
			return self:fail_route()
		end
	end
	
	function handle_hex_code(self, this)
		if this == "-" then
			if self.n.i ~= 8 then
				return self:fail_route()
			end
			self:emit("-")
			-- Ends -QINU`\"'\127 (one dash).
			self.n.i = 1
			self.n.handler = handle_suffix
		elseif match(this, "^[%d%u]$") then
			self:emit(this)
			self.n.i = self.n.i + 1
		else
			return self:fail_route()
		end
	end
	
	function handle_dec_code(self, this)
		if this == "-" then
			self:emit("-")
			-- Ends --QINU`\"'\127 (two dashes).
			self.n.i = 0
			self.n.handler = handle_suffix
		elseif match(this, "^%d$") then
			self:emit(this)
		else
			return self:fail_route()
		end
	end
	
	function handle_suffix(self, this)
		self.n.i = self.n.i + 1
		if this ~= sub("-QINU`\"'\127", self.n.i, self.n.i) then
			return self:fail_route()
		end
		self:emit(this)
		if self.n.i == 9 then
			return self:pop()
		end
	end
	
	function Parser:do_strip_marker()
		self:set("handler", handle_prefix)
		self.n.i = 0
		self:emit("\127")
		self:advance()
	end
	
	function Parser:strip_marker()
		local strip_marker = self:get("do_strip_marker")
		if strip_marker == self.n.bad_route then
			return nil
		elseif strip_marker.tag == "nowiki" then
			local head = strip_marker.head
			strip_marker = export.parse_nowiki(unstrip_nowiki(concat(strip_marker)))
			strip_marker.tag = "nowiki"
			strip_marker.head = head
		end
		return StripMarker:new(strip_marker)
	end
end

------------------------------------------------------------------------------------
--
-- Wikilink
--
------------------------------------------------------------------------------------

do
	local function is_invalid_target(this, pattern)
		return not not (
			not this or
			this == "" or
			this == "\239\191\189" or -- U+FFFD Replacement Character
			match(this, pattern)
		)
	end
	
	do
		local handle_target_decoding
		local handle_target
		local handle_target_whitespace
		local handle_target_escape
		local handle_capitalizer
		local handle_multipart
		local handle_end_after_target
		local handle_default_display_text
		local handle_after_pipe
		local handle_rsqb_after_pipe
		local handle_text
		local handle_text_after_newline
		local handle_end_after_text
		local handle_end_after_extra_rsqb
		local handle_trail
	
		function handle_target_decoding(self, this)
			if this == "%" then
				this = self:percent_encoding()
				if type(this) == "table" then
					return handle_target_decoding(self, rawstring(this))
				end
				return this, this
			elseif this == "&" then
				this = self:html_entity(true)
				if this == false then
					return nil
				end
			end
			return this or "&", type(this) == "table" and rawstring(this) or this or "&"
		end
		
		function handle_target(self, this)
			if this == "'" then
				this = self:apostrophes()
				self.n.apos = self.n.apos or this and true or nil
				self:emit(this or "'")
				return
			elseif this == "\\" then
				self.n.override = handle_target_escape
				return
			elseif this == "^" then
				self.n.override = handle_capitalizer
			elseif self.unembedded_link then
				if this == "/" then
					self.n.override = handle_multipart
					return
				elseif this == "" then
					local ret = handle_default_display_text(self, true)
					if ret then
						return ret
					end
					self["end"] = true
					return self:pop()
				end
			-- Only if not self.unembedded_link.
			elseif this == "]" then
				if #self.n == 0 then
					return self:fail_route()
				end
				self.n.handler = handle_end_after_target
				return
			elseif this == "|" then
				if #self.n == 0 then
					return self:fail_route()
				end
				local wikilink = self:wikilink_target(Wikitext:new(self:pop_sublayer(), true))
				if not wikilink then
					return self:fail_route()
				elseif wikilink.other then
					return self:pop()
				end
				self.n.handler = handle_after_pipe
				return
			end
			local decoded
			if self.n.fragment then
				-- "<" and ">" are valid as literals in fragments.
				if is_invalid_target(this, "^[%z\1-\31%[%]{|}\127]$") then
					return self:fail_route()
				end
				this, decoded = handle_target_decoding(self, this)
				if not decoded then
					return self:fail_route()
				end
			else
				this, decoded = handle_target_decoding(self, this)
				if is_invalid_target(decoded, "^[%z\1-\31<>%[%]{|}\127]$") then
					return self:fail_route()
				end
			end
			if decoded == "#" then
				self:emit(this)
				self.n.fragment = true
			elseif d.BIDI[decoded] then
				return
			elseif d.WIKILINK_SPACE[decoded] then
				self:emit(this)
				self.n.override = handle_target_whitespace
			else
				self:emit(this)
			end
		end
		
		function handle_target_whitespace(self, this)
			if this == " " then
				return
			elseif d.WIKILINK_SPACE[this] then
				self:emit(this)
			else
				self.n.override = nil
				return self:consume()
			end
		end
		
		function handle_target_escape(self, this)
			self.n.override = nil
			if this == "" then
				return self:consume()
			-- Retain escape for second pass.
			elseif this == "#" or this == ":" or this == "\\" then
				self:emit("\\")
			end
			self:emit(this)
		end
		
		function handle_capitalizer(self, this)
			self.n.override = nil
			-- TODO
		end
		
		function handle_multipart(self, this)
			self.n.override = nil
			if this == "/" then
				local ret = handle_default_display_text(self, true)
				if ret then
					return ret
				end
				return self:pop()
			else
				self:emit("/")
				return self:consume(this)
			end
		end
		
		function handle_end_after_target(self, this)
			if this ~= "]" then
				return self:fail_route()
			end
			local ret = handle_default_display_text(self)
			if ret then
				return ret
			end
			local display = self.n.display
			-- Push self.n.display onto the stack for the trail.
			display.handler = handle_trail
			display.head = self.head
			display.route = handle_trail
			local len = self.len + 1
			self[len] = display
			self.n = display
			self.len = len
		end
		
		function handle_default_display_text(self, unembedded_link)
			local raw_display = self:pop_sublayer()
			-- Generate the target using a clone of raw_display, in case it gets trashed.
			local wikilink = self:wikilink_target(
				Wikitext:new({unpack(raw_display)}, true),
				unembedded_link
			)
			if not wikilink then
				return self:fail_route()
			elseif wikilink.other then
				return self:pop()
			end
			self.n.display = Wikitext:new(raw_display, true)
			-- Style apostrophes are parsed before the trail is added.
			self:substitute_apostrophes(self.n.display)
		end
		
		function handle_after_pipe(self, this)
			if this == "]" then
				self.n.handler = handle_rsqb_after_pipe
				return
			end
			self:push_sublayer(handle_text)
			return self:consume()
		end
		
		function handle_rsqb_after_pipe(self, this)
			if this == "]" then
				return self:fail_route()
			end
			self:push_sublayer(handle_text)
			self:emit("]")
			return self:consume()
		end
		
		-- Note: except for trails, sortkeys are parsed like display text, since Parsoid parses them before doing the category logic.
		function handle_text(self, this)
			if this == "\n" then
				self:newline()
				self.n.override = handle_text_after_newline
			elseif this == "&" then
				self:emit(self:html_entity() or "&")
			elseif this == "'" then
				this = self:apostrophes()
				self.n.apos = self.n.apos or this and true or nil
				self:emit(this or "'")
			elseif this == "<" then
				self:html_tag()
			elseif this == "[" then
				if self.n.len > 0 and self:emitted() == "[" then
					if self.n.other == "file" then
						-- TODO
					else
						return self:fail_route()
					end
				end
				self.n.extra_rsqb = true
				self:emit("[")
			elseif this == "]" then
				self.n.handler = handle_end_after_text
				if self.n.extra_rsqb then
					local end_of_text = self:get("do_wikilink_end_after_extra_rsqb")
					if end_of_text ~= self.n.bad_route then
						self:emit_tokens(end_of_text)
						return self:consume()
					end
				end
			elseif this == "{" then
				-- TODO: table
			elseif this == "|" and self.n.other == "file" then
				-- TODO
			elseif this == "\127" then
				self:emit(self:strip_marker() or "?")
			elseif this == "" then
				return self:fail_route()
			else
				self:emit(this)
			end
		end
		
		function handle_text_after_newline(self, this)
			if this == " " or this == "\t" then
				return
			end
			self.n.override = nil
			if this == "-" then
				self:horizontal_rule()
			elseif this == "=" then
				self:heading()
			else
				return self:consume()
			end
		end
		
		function handle_end_after_text(self, this)
			if this == "]" then
				-- Style apostrophes are parsed before the trail is added.
				-- This is (bizarrely) even applied to sortkeys.
				self:finalize_line()
				if self.n.other == "category" then
					local sortkey = Wikitext:new(self:pop_sublayer())
					self.n.sortkey = sortkey
					return self:pop()
				end
				self.n.handler = handle_trail
			else
				self:emit("]")
				self.n.handler = handle_text
				return self:consume()
			end
		end
		
		function handle_end_after_extra_rsqb(self, this)
			if this == "]" then
				self.n.i = self.n.i + 1
				if self.n.i == 2 then
					return self:pop()
				end
			else
				return self:fail_route()
			end
		end
		
		function handle_trail(self, this)
			if not match(this, "^%a$") then
				local display = Wikitext:new(self:pop_sublayer())
				self.n.display = display
				self:advance(-1)
				return self:pop()
			end
			self:emit(this)
		end
		
		function Parser:do_wikilink(head)
			self:set("handler", handle_target)
			self.head = head or self.head
			self:push_sublayer()
		end
		
		function Parser:do_wikilink_end_after_extra_rsqb()
			self:set("handler", handle_end_after_extra_rsqb)
			self.n.i = 0
			self:emit("]")
			self:advance()
		end
	end
	
	-- Second pass over wikilink target:
		-- Get normalized prefixes: capitalization is ignored, and spacing characters + "_" become spaces.
		-- Get any fragment.
		-- Check for the colon trick.
		-- Ignore style apostrophes.
	do
		local handle_target_decoding_2
		local handle_prefix
		local handle_target_2
		local handle_target_escape_2
		local handle_category
		
		function handle_target_decoding_2(self, this)
			if type(this) == "table" then
				if this.type == "apostrophes" then
					return "apostrophes"
				end
				-- Replace HTML entities and percent-encoding with the relevant characters.
				local decoded, pos = this.char, self.head
				self.text[pos] = decoded[1]
				for i = 2, #decoded do
					pos = pos + 1
					insert(self.text, pos, decoded[i])
				end
				return decoded[1]
			end
			return this
		end
		
		function handle_prefix(self, this)
			if this == "" then
				return self:fail_route()
			end
			this = handle_target_decoding_2(self, this)
			if this == "apostrophes" then
				return self:fail_route()
			elseif this == ":" then
				if #self.n == 0 then
					return self:pop()
				end
				local raw_prefix = concat(self.n)
				local prefix = load_data("Module:data/namespaces")[raw_prefix]
				if prefix then
					self.n.prefix_type = "namespace"
					-- Normalize namespace.
					if raw_prefix == prefix then
						self.n.normalized = self.n
					else
						self.n.normalized = Wikitext:new(explode(prefix))
					end
					self.n.str = prefix
					return self:pop()
				end
				local prefix_type = load_data("Module:data/interwikis")[raw_prefix]
				if not prefix_type then
					return self:fail_route()
				else
					self.n.prefix_type = prefix_type
					self.n.normalized = self.n
					self.n.str = prefix
					return self:pop()
				end
			-- Don't emit spaces at the start or end.
			elseif d.WIKILINK_SPACE[this] then
				if self.n.can_emit_space then
					self.n.do_emit_space = true
				end
			else
				if self.n.do_emit_space then
					self:emit(" ")
					self.n.do_emit_space = nil
				end
				if #this == 1 then
					if not match(this, "^%w$") then
						return self:fail_route()
					end
					self:emit(lower(this))
				else
					self:emit(ulower(this))
				end
				self.n.can_emit_space = true
			end
		end
		
		function handle_target_2(self, this)
			if this == "\\" then
				self.n.override = handle_target_escape_2
				return
			elseif this == "" then
				local layer = Wikitext:new(self:pop_sublayer())
				self.n[self.n.title and "fragment" or "title"] = layer
				return self:pop()
			end
			this = handle_target_decoding_2(self, this)
			if this == "apostrophes" then
				return
			elseif not self.n.title then
				if this == "#" then
					local title = Wikitext:new(self:pop_sublayer())
					self.n.title = title
					self:push_sublayer()
					return
				end
				-- TODO: add a title length counter and fail if too long
				if this == "%" then
					-- TODO: check for percent-encoding format
				elseif this == "&" then
					-- TODO: check for HTML entity format
				elseif this == "." then
					-- TODO: check for dot slash notation
				elseif this == "/" then
					-- TODO: ditto
				elseif this == "~" then
					-- TODO: check for 3+ consecutive tildes
				end
			end
			self:emit(this)
		end
		
		function handle_target_escape_2(self, this)
			self.n.override = nil
			self:emit(this)
		end
		
		function handle_file_or_category(self, this)
			if this == "" then
				local layer = Wikitext:new(self:pop_sublayer())
				self.n.title = layer
				return self:pop()
			elseif type(this) == "table" then
				if this.type == "apostrophes" then
					for _ = 1, this.num do
						self:emit("'")
					end
					return
				end
				this = rawstring(this)
			end
			self:emit(this)
		end
		
		function Parser:do_prefix()
			self.n.handler = handle_prefix
		end
		
		function Parser:do_wikilink_2(unembedded_link)
			local colons, prefix, prefixes, prev_prefix_type = 0
			while true do
				prefix = self:get("do_prefix")
				if prefix == self.n.bad_route then
					break
				elseif not prefixes then
					if prefix.len == 0 then
						if colons == 1 then
							return self:fail_route()
						end
						self.n.colon_trick = true
					elseif prefix.prefix_type == "current" then
						self.n.colon_trick = true
					else
						prefixes = Prefix:new{}
						insert(prefixes, prefix.normalized)
						prev_prefix_type = prefix.prefix_type
					end
					colons = 1
				elseif #prefixes == 1 and prefix.len == 0 then
					if (
						colons == 2 or
						colons == 1 and not (
							prev_prefix_type == "local" or
							prev_prefix_type == "external"
						)
					) then
						return self:fail_route()
					end
					colons = 2
				elseif prefix.len > 0 then
					insert(prefixes, prefix.normalized)
					colons = 1
					prev_prefix_type = prefix.prefix_type
				end
				self:advance()
				-- Category prefix in an unembedded link always links to the category.
				if not self.n.colon_trick and (
					prefix.str == "file" or 
					prefix.str == "category" and not unembedded_link
				) then
					self.n.handler = handle_file_or_category
					self.n.other = prefix.str
					self:push_sublayer()
					return
				elseif prefix.prefix_type == "namespace" then
					break
				end
			end
			self.n.prefix = prefixes
			self.n.handler = handle_target_2
			self:push_sublayer()
		end
		
		function Parser:wikilink_target(target, unembedded_link)
			if self.n.apos then
				self:handle_odd_number_italics_and_bold(target)
				self.n.apos = nil
			end
			local parser = Parser:new(target)
			local wikilink = parser:get("do_wikilink_2", unembedded_link)
			if wikilink == parser.bad_route then
				return nil
			end
			self.n.title = wikilink.title
			if wikilink.other then
				self.n.other = wikilink.other
			else
				self.n.colon_trick = wikilink.colon_trick
				self.n.prefix = wikilink.prefix
				self.n.fragment = wikilink.fragment
			end
			return wikilink
		end
	end
	
	function Parser:wikilink(head)
		local wikilink = self:get("do_wikilink", head)
		if wikilink == self.n.bad_route then
			self:emit("[")
			self:emit("[")
			self:advance()
		elseif wikilink.other == "category" then
			self:emit(Category:new(wikilink))
		else
			self:emit(Wikilink:new(wikilink))
		end
	end
	
	do
		local function traverse_link_template(self)
			local this, layer
			repeat
				this = self:read()
				if this == "\r" then
					layer = self:carriage_return("\r")
				elseif this == "<" then
					layer = self:comment()
				elseif this ~= "\0" then
					layer = self:consume(this)
				end
				self:advance()
			until layer
			self:advance(-1)
			return layer
		end
		
		function Parser:do_link_template(_, head)
			self.traverse = traverse_link_template
			self.unembedded_link = true
			self.head = head
			return self:do_wikilink()
		end
	end
	
	function export.parse_link_template(str)
		local text = explode(str)
		return Parser:multipart(
			{
				text = text,
				node = {Wikilink},
				route = {"do_link_template"}
			},
			{
				text = text,
				node = {Wikitext},
				route = {"do_default"}
			}
		)
	end
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

do
	-- Handlers.
	local handle_plaintext
	local handle_plaintext_whitespace
	local handle_plaintext_after_newline
	local handle_multipart
	
	function handle_plaintext(self, this)
		if this == " " or this == "\t" then
			self:emit(this)
			self.n.override = handle_plaintext_whitespace
		elseif this == "\n" then
			self:newline()
			self.n.override = handle_plaintext_after_newline
		elseif this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == "'" then
			this = self:apostrophes()
			self.n.apos = self.n.apos or this and true or nil
			self:emit(this or "'")
		elseif this == "/" and self.multi then
			self.n.override = handle_multipart
		elseif this == ":" then
			self:free_external_link()
		elseif this == "<" then
			self:html_tag()
		elseif this == "I" or this == "P" or this == "R" then
			self:magic_link(this)
		elseif this == "[" then
			self:bracketed_external_link()
		elseif this == "\127" then
			self:emit(self:strip_marker() or "?")
		elseif this == "" then
			self:finalize_line()
			self["end"] = true
			return self:pop()
		else
			self:emit(this)
		end
	end
	
	function handle_plaintext_whitespace(self, this)
		if this ~= " " and this ~= "\t" then
			self.n.override = nil
			return self:consume(this)
		end
	end
	
	function handle_plaintext_after_newline(self, this)
		self.n.override = nil
		if this == "#" then
			self:emit(self.OrderedListMarker)
		elseif this == "*" then
			self:emit(self.UnorderedListMarker)
		elseif this == "-" then
			self:horizontal_rule()
		elseif this == ":" then
			self:emit(self.IndentationMarker)
		elseif this == ";" then
			self:emit(self.DescriptionListMarker)
			self.n.dl = true
		elseif this == "=" then
			self:heading()
		else
			return self:consume(this)
		end
	end
	
	function handle_multipart(self, this)
		self.n.override = nil
		if this == "/" then
			self:finalize_line()
			return self:pop()
		else
			self:emit("/")
			return self:consume(this)
		end
	end
	
	do
		local function traverse_default(self)
			local this, layer
			repeat
				this = self:read()
				if this == "\r" then
					layer = self:carriage_return("\r")
				elseif this == "<" then
					layer = self:comment()
				elseif this == "_" then
					layer = self:magic_word()
				elseif this ~= "\0" then
					layer = self:consume(this)
				end
				self:advance()
			until layer
			self:advance(-1)
			return layer
		end
		
		function Parser:do_default(multipart, head)
			self.traverse = traverse_default
			if multipart then
				self.multi = multipart
				self.head = head
			end
			self:set("handler", handle_plaintext)
		end
	end
	
	function export.parse(str, multipart)
		local data = {
			text = explode(str),
			node = {Wikitext},
			route = {"do_default"}
		}
		if multipart then
			return Parser:multipart(data)
		end
		return (select(2, Parser:parse(data)))
	end
end

do
	local handle_nowiki
	local handle_multipart
	
	function handle_nowiki(self, this)
		if this == "\r" then
			return self:carriage_return("\r")
		elseif this == "&" then
			self:emit(self:html_entity() or "&")
		elseif this == "/" and self.multi then
			self.n.override = handle_multipart
		elseif this == "" then
			self:finalize_line()
			self["end"] = true
			return self:pop()
		elseif this ~= "/0" then
			self:emit(this)
		end
	end
	
	function handle_multipart(self, this)
		self.n.override = nil
		if this == "/" then
			return self:pop()
		else
			self:emit("/")
			return self:consume(this)
		end
	end
	
	function Parser:do_nowiki(multipart, head)
		if multipart then
			self.multi = multipart
			self.head = head
		end
		self:set("handler", handle_nowiki)
	end
	
	function export.parse_nowiki(str)
		local data = {
			text = explode(str),
			node = {Wikitext},
			route = {"do_nowiki"}
		}
		return (select(2, Parser:parse(data)))
	end
end

return export