Canonical interwiki prefixes/PMWTableToWikiTable.php
Appearance
This script takes the PMWTable generated by Chris G's botclasses/ParseMirroredWikiIndexBot.php and converts it to a wikitable. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap.
<?php
/**
* PMWTableToWikiTable.php
* By Leucosticte < https://www.mediawiki.org/wiki/User:Leucosticte >
* This script takes the PMWTable generated by ParseMirroredWikiIndex and converts it to a wiki
* table. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
// The prefix must have one of these in it, or "wiki" will be appended to the end of it
$goodPrefixes = array(
'wiki',
'pedia'
);
// These will be stripped out of wiki names when generating the wiki prefix
$forbiddenChars = array (
' ',
':',
'&',
'='
);
// Skip these prefixes; they won't make it through the spam blacklist
$blacklist = array (
'biosites.orgwiki'
);
// Database
$host = 'localhost';
$dbUser = 'root';
$dbPass = 'REMOVED';
$dbName = 'parse_mirrored_wikiindex_bot';
$tables = array(
'parsed_mirrored_wikiindex' => 'parsed-mirrored-wikiindex.sql',
);
$db = new mysqli( $host, $dbUser, $dbPass, $dbName );
if ( !$db ) {
die( 'Could not connect: ' . mysql_error());
}
$db->select_db ( "$dbName" );
// These statuses should all be lowercase. They are the wikis that will not be screened out
// (private, cannot connect, inactive, and dead wikis are screened out)
$goodStatuses = array (
'vibrant',
'active',
'new',
'in preparation',
'dormant',
'needslove',
'spammed',
'goalreached',
'' // Wikis with no status are included
);
// These are the serialized data fields pulled from WikiIndex; presently, only the URL and
// name are used
$fields = array (
'wikiindex page title',
'name',
'URL',
'logo',
'wide logo',
'iw_url',
'recentchanges URL',
'wikinode URL',
'status',
'language',
'editmode',
'engine',
'license',
'maintopic',
'backupurl',
'backupdate',
'pages',
'statistics URL',
'wikiFactor',
'wikiFactor URL',
);
// These are the Special:RecentChanges that will be str_replace'd with $1. If I knew regex, I'd use
// regex, because probably some wiki software has url conventions that require it
$RCPossibilities = array(
'Посебно:СкорашњеИзмене',
'Посебно:Скорашње_измене',
'Sipesol:Nupela senis',
'Extra:Neuste_Änderunge',
'Extra:Änderunge',
'Spèciâl:Dèrriérs_changements',
'Spèciâl:DèrriérsChangements',
'Speçiale:Ûrtime modiffiche',
'Sapaq:NaqhaHukchasqa',
'Wiki:Koartlyn feroare',
'Wiki:Koarts feroare',
'Spezial:Letzte_Änderungen',
'Spécial:Modifications_récentes',
'Spécial:Modifications_recentes',
'Spécial:ModificationsRécentes',
'Spécial:ModificationsRecentes',
'Especial:Zaguers_cambeos',
'Especial:Cambeos_recients',
'Spesiaal:Onlangse_wysigings',
'Spesiaal:Onlangsewysigings',
'Specialnje:Aktualne_změny',
'พิเศษ:ปรับปรุงล่าสุด',
'Especial:TrocamientosFreskos',
'Ippiziari:UlthimiMudìfigghi',
'Spezial:Toletzt ännert',
'Spezial:Neeste Ännern',
'Spezial:Toletzt ännert',
'Spezial:Neeste Ännern',
'විශේෂ:මෑත_වෙනස්වීම්',
'Xüsusi:SonDəyişikliklər',
'特殊:最近更改',
'Arbednek:Chanjyow_a-dhiwedhes',
'Служебная:Свежие_правки',
'Schbädsjaal:Lädsdâ_Änârungâ',
'Special:Modificationes_recente',
'Serstakt:Seinastu broytingar',
'Specialus:Naujausi_keitimai',
'Spesial:Siste_endringar',
'特殊:最近更改',
'Арнайы:Жуықтағы_өзгерістер',
'Spesial:Siste_endringer',
'Спэцыяльныя:Апошнія_зьмены',
'ارنايى:جۋىقتاعى_وزگەرىستەر',
'ବିଶେଷ:ନଗଦବଦଳ',
'Speciale:NdryshimeSëFundmi',
'Special:Nov_changes',
'Specialis:Nuper mutata',
'Specialis:Mutationes recentes',
'Speciális:Friss_változtatások',
'Speciaal:Lètste_verangeringe',
'Špeciálne:PoslednéÚpravy',
'Башка:УлхкомбаньПолафнематне',
'বিশেষ:শেহতীয়া_সালসলনি',
'Цастәи:АрҽеираҾыцқәа',
'Erenoamáš:Varas_rievdadusat',
'Pàtàkì:ÀwọnÀtúnṣeTuntun',
'Aptaca:NoeltafBetakseem',
'Especial:Cambios_recentes',
'Maalum:MabadalikoyaKaribuni',
'Сæрмагонд:ФæстагИвдтытæ',
'Specjalna:Ostatnie_zmiany',
'Specjalna:OZ',
'ހާއްޞަ:އެންމެ ފަހުގެ ބަދަލްތައް',
'Istimiwa:Paubahan_pahanyarnya',
'Posebno:Nedavne_promjene',
'Özel:SonDeğişiklikler',
'Kerfissíða:Nýlegar_breytingar',
'Speċjali:TibdilRiċenti',
'പ്രത്യേകം:സമീപകാലമാറ്റങ്ങൾ',
'Speciální:Poslední_změny',
'Speciální:Posledni_zmeny',
'Special:CambiamentRecent',
'Speciâl:UltinsCambiaments',
'Maasus:BitkiDiişikmäklär',
'特別:最近修改',
'विशेष:अलीकडील_बदल',
'ځانګړی:اوسني_بدلونونه',
'Махсус:Соңгы_үзгәртүләр',
'Istimewa:Perubahan_terbaru',
'Istimewa:PerubahanTerbaru',
'Istimewa:RC',
'Istimewa:PT',
'מיוחד:שינויים_אחרונים',
'Speciaal:RecenteWijzigingen',
'ພິເສດ:ການດັດແກ້ຫຼ້າສຸດ',
'Սպասարկող:Վերջինփոփոխությունները',
'Posebno:NedavneIzmjene',
'વિશેષ:તાજાફેરફારો',
'Jagleel:Coppite yu mujj',
'특수:최근바뀜',
'ܕܝܠܢܝܐ:ܫܘܚܠܦ̈ܐ_ܚܕ̈ܬܐ',
'Espesiál:Mudansa_foufoun_sira',
'Специјална:СкорешниПромени',
'Husus:AnyarRobah',
'خاص:اخر_تعديلات',
'Xısusi:VurnayışêPeyêni',
'באַזונדער:לעצטע_ענדערונגען',
'Specialine:TantoižedToižetused',
'Speciale:UltimeModifiche',
'სპეციალური:ბოლოცვლილებები',
'Arnawlı:Aqırg\'ı o\'zgerisler',
'Espesyal:Bag-ongGiusab',
'Spiciali:UltimeModifiche',
'Especial:Mudanças_recentes',
'Especial:Recentes',
'Especial:Mudanças_recentes',
'Espesyal:ChanjmanResan',
'Especial:CambiosRecientes',
'Especial:Cambios_recientes',
'ప్రత్యేక:ఇటీవలిమార్పులు',
'Manokana:Fanovàna_farany',
'विशेषम्:नवीनतम_परिवर्तन',
'ویژه:تغییرات_اخیر',
'خاص:أحدث_التغييرات',
'特別:最近の更新',
'特別:最近更新したページ',
'Đặc_biệt:Thay_đổi_gần_đây',
'Башка тевень:ЧыяконьПолавтомат',
'Toiminnot:Tuoreet_muutokset',
'Arnaýı:Jwıqtağı_özgerister',
'حاص:نوکین تغییرات',
'Posebno:ZadnjeSpremembe',
'Special:Schimbări_recente',
'Spezial:Rezent_Ännerungen',
'Berezi:AzkenAldaketak',
'Xususi:Ән_нујә_дәгишон',
'Taybet:Guherandinên_dawî',
'Posebno:Nedavne_izmjene',
'Specialaĵo:Lastaj_ŝanĝoj',
'Especial:Darrièrs_cambiaments',
'Especial:DarrièrsCambiaments',
'Especial:Darrièras_Modificacions',
'ពិសេស:បំលាស់ប្ដូរថ្មីៗ',
'Ειδικό:ΠρόσφατεςΑλλαγές',
'Eri:Viimased_muudatused',
'Speciel:Seneste_ændringer',
'Papa_nui:NāLoliHou',
'Papa_nui:NaLoliHou',
'Special:最近更改',
'Лӱмын_ыштыме:Пытартыш_тӧрлатымаш-влак',
'Spesyal:BakaseywanKenki',
'Спеціальна:Нові_редагування',
'Natatangi:Mga_huling_binago',
'Natatangi:HulingBinago',
'Специални:Последни_промени',
'Specala:RecentaChanji',
'تایبەت:دوایین_گۆڕانکارییەکان',
'Especial:Canvis_recents',
'Khas:Perubahan_terkini',
'Special:Senaste_ändringar',
'Шпеціална:Послїднї_зміны',
'Speciale:ÙltimiCanbiamenti',
'Spesiaal:Leste_wiezigingen',
'Kusuih:Neuubah_baro',
'Specialne:Aktualne_změny',
'خاص:تازيون تبديليون',
'Special:RecentChanges',
'Patikos:Votükamsnulik',
'Dibar:KemmoùDiwezhañ',
'do=recent' => 'id=$1', // AwkiAwki
'index.php?page=RecentChanges' => 'index.php?page=$1', // Bitweaver
#'_Recent', // EditMe; disabled because of conflicts with SeedWiki
'AllRecentChanges', // PmWiki
'WebChanges', // TKWiki
'.cgi?RecentChanges' => '.cgi?$1', // UseModWiki
'space/changes' => '$1', // Wikispaces
);
// Deal with these ones that have an English Special: followed by a foreign language RecentChanges
foreach ( $RCPossibilities as $RCPossibility ) {
$exploded = explode ( ':', $RCPossibility );
if ( isset ( $exploded[1] ) ) {
$RCPossibilities[] = 'Special:' . $exploded[1];
}
}
// Retrieve the interwiki map
$wgInterwikiMapUserAgent =
'User-Agent: LeucosticteBot (http://mediawiki.org/wiki/User:LeucosticteBot)';
$opts = array(
'http'=>array(
'method' => "GET",
'header' => $wgInterwikiMapUserAgent
)
);
$wgInterwikiMapApiArgs = '?action=query&meta=siteinfo&siprop=interwikimap&format=json';
$url = 'https://meta.wikipedia.org/w/api.php';
$url .= $wgInterwikiMapApiArgs;
$streamContext = stream_context_create( $opts );
$contents = file_get_contents ( $url, false, $streamContext );
if ( !$contents ) {
die ( "Retrieval from $url failed\n" );
}
$apiPull = json_decode ( $contents, true );
if ( !$apiPull ) {
die( "json decode of $url failed\n" );
}
$apiPull = $apiPull['query']['interwikimap'];
foreach ( $apiPull as $apiPullElement ) {
$apiResult[$apiPullElement["prefix"]] = $apiPullElement["url"];
}
$wikitable = '{{MediaWiki:interwikimapbackup-desc}}'
. "\n\n==Current interwiki map==\n\n" . '{| class="plainlinks"' . "\n";
// Gather stored data from database table
$res = $db->query ( "SELECT * FROM parsed_mirrored_wikiindex" );
while ( $row = $res->fetch_assoc() ) {
$unserialized = unserialize ( $row['pmw_data'] );
$unserialized['wikiindex page title'] = $row['pmw_wikiindex_page_title'];
// Get the RC URL and convert it to the iw_url
$unserialized['iw_url'] = '';
if ( isset ( $unserialized['recentchanges URL'] ) ) {
$unserialized['recentchanges URL'] = strtolower ( $unserialized['recentchanges URL'] );
foreach ( $RCPossibilities as $key => $RCPossibility ) {
$RCPossibility = strtolower ( $RCPossibility );
if ( !is_int ( $key ) ) {
$unserialized['iw_url'] = str_replace ( $key, $RCPossibility,
$unserialized['recentchanges URL'] );
} elseif ( strpos ( $unserialized['recentchanges URL'], $RCPossibility ) ) {
$unserialized['iw_url'] = str_replace ( $RCPossibility, '$1',
$unserialized['recentchanges URL'] );
}
}
if ( !isset ( $unserialized['iw_prefix'] ) ) {
$unserialized['iw_prefix'] = '';
}
if ( !isset ( $unserialized['status'] ) ) {
$unserialized['status'] = '';
}
// If it's in meta-wiki's map, use that prefix
foreach ( $apiPull as $apiPullElement ) {
if ( $unserialized['iw_url'] == $apiPullElement['url'] ) {
$unserialized['iw_prefix'] = $apiPullElement['prefix'];
}
}
// If it's not in meta-wiki's map, but has an active status, then convert the wiki name
// or wikiindex page title to a prefix
if ( !$unserialized['iw_prefix'] && in_array ( strtolower ( $unserialized['status'] ),
$goodStatuses ) && !in_array ( $unserialized['iw_prefix'], $blacklist ) ) {
if ( isset ( $unserialized['name'] ) ) {
$name = $unserialized['name'];
} else {
$name = $unserialized['wikiindex page title'];
}
foreach ( $forbiddenChars as $forbiddenChar) {
$name = str_replace ( $forbiddenChar, '', $name );
}
$name = strtolower ( $name );
$approvedPrefix = false;
foreach ( $goodPrefixes as $goodPrefix ) {
if ( strpos ( $name, $goodPrefix ) ) {
$approvedPrefix = true;
}
}
if ( !$approvedPrefix ) {
$name .= 'wiki';
}
$unserialized['iw_prefix'] = $name;
}
if ( $unserialized['iw_prefix'] ) {
$wikitable .= '| ' . $unserialized['iw_prefix'] . ' || ' . $unserialized['iw_url']
. "\n" . '|-' . "\n";
}
}
}
$wikitable .= "|}\n";
$file = fopen ( 'PMWOutput.txt', 'w' );
fwrite ( $file, $wikitable );
fclose ( $file );