User:Erutuon/scripts/addMacronsAndBreves.js

Note – after saving, you may have to bypass your browser’s cache to see the changes.

  • Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
  • Konqueror and Chrome: click Reload or press F5;
  • Opera: clear the cache in Tools → Preferences;
  • Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.

/*
	Adds macrons and breves to links in Ancient Greek categories by retrieving
	them from the wikitext of entries. Very alpha.
*/

/* jshint esversion: 6, eqeqeq: true, varstmt: true, unused: true, undef: true */
/* globals mw, $ */
// <nowiki>

(function () {
	"use strict";
	
	const mwValues = mw.config.values;
	
	if ( mwValues.wgCanonicalNamespace === "Category" ) {
		// Should filter out Greek words with no ambiguous vowels.
		const grcLinks = $("[lang=grc]").filter(
			function () {
				return this.firstChild.tagName === "A";
			});
		
		if ( grcLinks.length === 0 ) {
			return;
		}
		
		const linkList = [];
		for (let i = 0; i < 30; ++i) {
			linkList.push(grcLinks[i].firstChild.text);
		}
		
		const spacingToCombiningMapping = {
			"\u00AF": "\u0304", // spacing macron
			"\u02C9": "\u0304", // modifier macron
			"\u02D8": "\u0306", // spacing breve
		};
		const spacingToCombiningCharacters = function (str) {
			return str.replace(/[\u00AF\u02C9\u02D8]/g,
				function (spacingCharacter) {
					return spacingToCombiningMapping[spacingCharacter];
				});
		};
		
		const diacriticOrder = {
			"\u0304": 1, // macron
			"\u0306": 1, // breve
			"\u0314": 2, // rough breathing
			"\u0313": 2, // smooth breathing
			"\u0308": 2, // diaeresis
			"\u0301": 3, // acute accent
			"\u0300": 3, // grave accent
			"\u0342": 3, // circumflex accent
			"\u0345": 4, // iota subscript
		};
		
		const diacritics = Object.keys(diacriticOrder).join("");
		const diacriticSequenceRegex = new RegExp("[" + diacritics + "]{2,}", "g");
		
		// Based on reorderDiacritics in [[Module:grc-utilities]].
		const reorderDiacritics = function (str) {
			return str.normalize("NFD").replace(
				diacriticSequenceRegex,
				function (diacriticSequence) {
					return diacriticSequence
						.split("")
						.sort(
							function (diacritic1, diacritic2) {
								return diacriticOrder[diacritic1] - diacriticOrder[diacritic2];
							})
						.join("");
				})
				.normalize("NFC");
		};
		
		// Search for {{grc-IPA}} and Ancient Greek headword templates; stick
		// the first parameter (if any) in an array. This should also look in
		// |head= parameters and, if there's more than one form, choose the one
		// that's got the most macrons and breves.
		const getTermWithMacronsAndBreves = function (wikitext) {
			const regex = /{{(?:grc-IPA|grc-(?:noun|adj-(?:1&2|2nd|1&3|3rd)|verb|adv(?:erb)?))\|([^|}]+)}}/g;
			let match;
			const matches = [];
			while ( (match = regex.exec(wikitext)) ) {
				matches.push(reorderDiacritics(spacingToCombiningCharacters(match[1])));
			}
			return matches;
		};
		
		/*
			Get wikitext for a list of titles separated by pipes.
		*/
		const makeQueryURL = function (entryNames) {
			const URLString = "//en.wiktionary.org/w/api.php?action=query&format=json&titles=%s&prop=revisions&rvprop=content";
			return URLString.replace("%s", entryNames.join("|"));
		};
			
		$.get(makeQueryURL(linkList))
		.then(function (data) {
			const lengthMarkedWords = {};
			
			for (const { title, revisions: [ { "*": wikitext } ] }
			of Object.values(data.query.pages)) {
				lengthMarkedWords[title] = getTermWithMacronsAndBreves(wikitext);
			}
			
			grcLinks.find("a").html(function () {
				const linkText = this.text;
				return lengthMarkedWords[linkText] && lengthMarkedWords[linkText][0];
			});
		});
	}
} ());

// </nowiki>