MediaWiki:Gadget-catfixRegrouper-Data.js

Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.

  • Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
  • Konqueror and Chrome: click Reload or press F5;
  • Opera: clear the cache in Tools → Preferences;
  • Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.


// {{documentation}}
// <nowiki>
/* jshint maxerr:1048576, strict:true, undef:true, latedef:true, es5:true */
/* global mw */
// Configuration and data for [[MediaWiki:Gadget-catfixRegrouper.js]].

/** REGROUPER_DATA_LANG Data format:
 *      IMPORTANT! The regrouper already assumes that the language sort keys
 *      	are set up correctly. It does not protect against the same group
 *      	heading being repeated *AT ALL*. Do not enable the regrouper for
 *      	a language until sort keys are set up for it first (so that
 *          each would-be group is contiguous).
 *      The data is organized by language code.
 *      If the code is missing or the value is undefined, the
 * 			regrouper is disabled. This is the default.
 *      If it is ``true``, default values are used.
 *      Otherwise it is an object, with the following fields:
 * 			group
 * 				A function which is given the following parameters,
 * 					in this order:
 * 						- page title (minus namespace and language prefixes),
 * 						- language code,
 * 						- script code,
 * 						- namespace number.
 *					``this`` will be the language data object (so that you
 *					can add your own variables, functions, etc.; but if you
 *					do so, please prefix their names with x_).
 * 				It should return the group, i.e. the heading that the
 *	 			page should be categorized under. If the value returned
 *				is undefined, null or an empty string, its existing group
 *				will be kept.
 *				The default is the default grouping function, either of the
 *				script, or of ``defaultGroup``;
 *				see below for a description thereof.
 *			detectScript
 * 				A function which is given the following parameters,
 * 					in this order:
 * 						- page title (minus namespace and language prefixes),
 * 						- language code,
 * 						- (default) script code,
 * 						- namespace number.
 *					``this`` will be the language data object (so that you
 *					can add your own variables, functions, etc.; but if you
 *					do so, please prefix their names with x_).
 * 				It should return a script code, or undefined to use
 * 				the default script.
 * 			initials
 * 				If the default grouping function is used, this can be a RegExp
 * 				of initial letters/digraphs/etc. which are automatically mapped
 * 				to that respective group (with titlecase conversion).
 * 			initialFallback
 * 				Only applies for the default grouping function and if initials
 * 				is defined. If ``true``, the default grouping function falls
 *				back to the default logic when the initial doesn't match any
 *				specified in initials; if ``false`` (default), it just returns
 *				``undefined`` (i.e. keeps the existing group).
 *			ignore
 *				Preceding characters to ignore instead of the default ones.
 *				This is in RegExp character class syntax.
 *			ignoreAdd
 *				Preceding characters to ignore in addition to the default ones.
 *				This is in RegExp character class syntax.
 * 			unsupported
 * 				If ``true``, unsupported titles are passed directly to
 * 				``group``. If ``false`` (default), they are ignored, and their
 * 				existing groups are kept.
 * 			dottedDotlessI
 * 				Used in case conversion; ``true`` means the language has both
 * 				dotted and dotless I as separate letters (like in Turkish),
 * 				and ``false`` (default) means it doesn't.
 * 
 * REGROUPER_DATA_SC data format:
 * 		This data is organized by script. Note that these configurations are
 * 		still only considered for languages that have regrouping enabled.
 * 
 * 		If missing, defaults are used. Else, an object may override:
 * 			group
 * 				A function that works just like group in REGROUPER_DATA_LANG.
 * 				If missing, the default grouping function is used.
 * 				The priority of grouping functions is:
 * 					- group in language data,
 * 					- group in script data,
 *					- default grouping.
 *				
 * The default grouping function:
 * 			Checks initials and initialFallback.
 * 				If there are initials, it matches them first.
 * 				Initial matching ignores certain preceding characters,
 * 				e.g. hyphens.
 * 				If an initial is found, the matching portion is converted
 * 				to title case with ``titleCase`` and returned.
 *				Otherwise, we fall back to 'fallback' logic only if
 * 				`initialFallback` is `true`, and else return `undefined`
 * 				to keep the existing group.
 * 			We may fall into the fallback logic:
 * 				remove preceding characters (e.g. hyphens),
 * 				take the first remaining Unicode character
 * 						(or the first character in general if none would remain
 * 						 from the previous step),
 * 				convert it to title case with ``titleCase`` and return it.
 */

// we only need to run in categories (allow a couple other namespaces to work too, e.g. for debugging)
if ([14 /* Category */, 8 /* MediaWiki */].indexOf(mw.config.get('wgNamespaceNumber')) != -1) {
	window.RegrouperMetadata = function() {
		var REGROUPER_DATA_LANG = {
			"et": {
				initials: /[ŠZŽÕÄÖÜ]/i,
			},
			"fi": {
				initials: /[Å]/i,
			},
			// TODO check with editors. 
			// "hu": {
			// 	initials: /(?:[ÁÉÍÓÖŐÚÜŰ]|C[sz]|Dzs?|[GLNT]y|Sz)/i,
			// },
		};
		
		var REGROUPER_DATA_SC = {
		};
		
		this.getByLang = function(lang) {
			return REGROUPER_DATA_LANG[lang];
		}
		
		this.getBySc = function(sc) {
			return REGROUPER_DATA_SC[sc];
		}
	}
}
//</nowiki>