Jump to content

Canonical interwiki prefixes/PMWTableToWikiTable.php

From mediawiki.org

This script takes the PMWTable generated by Chris G's botclasses/ParseMirroredWikiIndexBot.php and converts it to a wikitable. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap.

<?php
/**
 * PMWTableToWikiTable.php
 * By Leucosticte < https://www.mediawiki.org/wiki/User:Leucosticte >
 * This script takes the PMWTable generated by ParseMirroredWikiIndex and converts it to a wiki
 * table. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 */


// The prefix must have one of these in it, or "wiki" will be appended to the end of it
$goodPrefixes = array(
      'wiki',
      'pedia'
);

// These will be stripped out of wiki names when generating the wiki prefix
$forbiddenChars = array (
      ' ',
      ':',
      '&',
      '='
);

// Skip these prefixes; they won't make it through the spam blacklist
$blacklist = array (
      'biosites.orgwiki'
);

// Database
$host = 'localhost';
$dbUser = 'root';
$dbPass = 'REMOVED';
$dbName = 'parse_mirrored_wikiindex_bot';
$tables = array(
    'parsed_mirrored_wikiindex' => 'parsed-mirrored-wikiindex.sql',
);
$db = new mysqli( $host, $dbUser, $dbPass, $dbName );
if ( !$db ) {
      die( 'Could not connect: ' . mysql_error());
}
$db->select_db ( "$dbName" );

// These statuses should all be lowercase. They are the wikis that will not be screened out
// (private, cannot connect, inactive, and dead wikis are screened out)
$goodStatuses = array (
      'vibrant',
      'active',
      'new',
      'in preparation',
      'dormant',
      'needslove',
      'spammed',
      'goalreached',
      '' // Wikis with no status are included
);

// These are the serialized data fields pulled from WikiIndex; presently, only the URL and
// name are used
$fields = array (
      'wikiindex page title',
      'name',
      'URL',
      'logo',
      'wide logo',
      'iw_url',
      'recentchanges URL',
      'wikinode URL',
      'status',
      'language',
      'editmode',
      'engine',
      'license',
      'maintopic',
      'backupurl',
      'backupdate',
      'pages',
      'statistics URL',
      'wikiFactor',
      'wikiFactor URL',
);

// These are the Special:RecentChanges that will be str_replace'd with $1. If I knew regex, I'd use
// regex, because probably some wiki software has url conventions that require it
$RCPossibilities = array(
	'Посебно:СкорашњеИзмене',
	'Посебно:Скорашње_измене',
	'Sipesol:Nupela senis',
	'Extra:Neuste_Änderunge',
	'Extra:Änderunge',
	'Spèciâl:Dèrriérs_changements',
	'Spèciâl:DèrriérsChangements',
	'Speçiale:Ûrtime modiffiche',
	'Sapaq:NaqhaHukchasqa',
	'Wiki:Koartlyn feroare',
	'Wiki:Koarts feroare',
	'Spezial:Letzte_Änderungen',
	'Spécial:Modifications_récentes',
	'Spécial:Modifications_recentes',
	'Spécial:ModificationsRécentes',
	'Spécial:ModificationsRecentes',
	'Especial:Zaguers_cambeos',
	'Especial:Cambeos_recients',
	'Spesiaal:Onlangse_wysigings',
	'Spesiaal:Onlangsewysigings',
	'Specialnje:Aktualne_změny',
	'พิเศษ:ปรับปรุงล่าสุด',
	'Especial:TrocamientosFreskos',
	'Ippiziari:UlthimiMudìfigghi',
	'Spezial:Toletzt ännert',
	'Spezial:Neeste Ännern',
	'Spezial:Toletzt ännert',
	'Spezial:Neeste Ännern',
	'විශේෂ:මෑත_වෙනස්වීම්',
	'Xüsusi:SonDəyişikliklər',
	'特殊:最近更改',
	'Arbednek:Chanjyow_a-dhiwedhes',
	'Служебная:Свежие_правки',
	'Schbädsjaal:Lädsdâ_Änârungâ',
	'Special:Modificationes_recente',
	'Serstakt:Seinastu broytingar',
	'Specialus:Naujausi_keitimai',
	'Spesial:Siste_endringar',
	'特殊:最近更改',
	'Арнайы:Жуықтағы_өзгерістер',
	'Spesial:Siste_endringer',
	'Спэцыяльныя:Апошнія_зьмены',
	'ارنايى:جۋىقتاعى_وزگەرىستەر',
	'ବିଶେଷ:ନଗଦବଦଳ',
	'Speciale:NdryshimeSëFundmi',
	'Special:Nov_changes',
	'Specialis:Nuper mutata',
	'Specialis:Mutationes recentes',
	'Speciális:Friss_változtatások',
	'Speciaal:Lètste_verangeringe',
	'Špeciálne:PoslednéÚpravy',
	'Башка:УлхкомбаньПолафнематне',
	'বিশেষ:শেহতীয়া_সালসলনি',
	'Цастәи:АрҽеираҾыцқәа',
	'Erenoamáš:Varas_rievdadusat',
	'Pàtàkì:ÀwọnÀtúnṣeTuntun',
	'Aptaca:NoeltafBetakseem',
	'Especial:Cambios_recentes',
	'Maalum:MabadalikoyaKaribuni',
	'Сæрмагонд:ФæстагИвдтытæ',
	'Specjalna:Ostatnie_zmiany',
	'Specjalna:OZ',
	'ހާއްޞަ:އެންމެ ފަހުގެ ބަދަލްތައް',
	'Istimiwa:Paubahan_pahanyarnya',
	'Posebno:Nedavne_promjene',
	'Özel:SonDeğişiklikler',
	'Kerfissíða:Nýlegar_breytingar',
	'Speċjali:TibdilRiċenti',
	'പ്രത്യേകം:സമീപകാലമാറ്റങ്ങൾ',
	'Speciální:Poslední_změny',
	'Speciální:Posledni_zmeny',
	'Special:CambiamentRecent',
	'Speciâl:UltinsCambiaments',
	'Maasus:BitkiDiişikmäklär',
	'特別:最近修改',
	'विशेष:अलीकडील_बदल',
	'ځانګړی:اوسني_بدلونونه',
	'Махсус:Соңгы_үзгәртүләр',
	'Istimewa:Perubahan_terbaru',
	'Istimewa:PerubahanTerbaru',
	'Istimewa:RC',
	'Istimewa:PT',
	'מיוחד:שינויים_אחרונים',
	'Speciaal:RecenteWijzigingen',
	'ພິເສດ:ການດັດແກ້ຫຼ້າສຸດ',
	'Սպասարկող:Վերջինփոփոխությունները',
	'Posebno:NedavneIzmjene',
	'વિશેષ:તાજાફેરફારો',
	'Jagleel:Coppite yu mujj',
	'특수:최근바뀜',
	'ܕܝܠܢܝܐ:ܫܘܚܠܦ̈ܐ_ܚܕ̈ܬܐ',
	'Espesiál:Mudansa_foufoun_sira',
	'Специјална:СкорешниПромени',
	'Husus:AnyarRobah',
	'خاص:اخر_تعديلات',
	'Xısusi:VurnayışêPeyêni',
	'באַזונדער:לעצטע_ענדערונגען',
	'Specialine:TantoižedToižetused',
	'Speciale:UltimeModifiche',
	'სპეციალური:ბოლოცვლილებები',
	'Arnawlı:Aqırg\'ı o\'zgerisler',
	'Espesyal:Bag-ongGiusab',
	'Spiciali:UltimeModifiche',
	'Especial:Mudanças_recentes',
	'Especial:Recentes',
	'Especial:Mudanças_recentes',
	'Espesyal:ChanjmanResan',
	'Especial:CambiosRecientes',
	'Especial:Cambios_recientes',
	'ప్రత్యేక:ఇటీవలిమార్పులు',
	'Manokana:Fanovàna_farany',
	'विशेषम्:नवीनतम_परिवर्तन',
	'ویژه:تغییرات_اخیر',
	'خاص:أحدث_التغييرات',
	'特別:最近の更新',
	'特別:最近更新したページ',
	'Đặc_biệt:Thay_đổi_gần_đây',
	'Башка тевень:ЧыяконьПолавтомат',
	'Toiminnot:Tuoreet_muutokset',
	'Arnaýı:Jwıqtağı_özgerister',
	'حاص:نوکین تغییرات',
	'Posebno:ZadnjeSpremembe',
	'Special:Schimbări_recente',
	'Spezial:Rezent_Ännerungen',
	'Berezi:AzkenAldaketak',
	'Xususi:Ән_нујә_дәгишон',
	'Taybet:Guherandinên_dawî',
	'Posebno:Nedavne_izmjene',
	'Specialaĵo:Lastaj_ŝanĝoj',
	'Especial:Darrièrs_cambiaments',
	'Especial:DarrièrsCambiaments',
	'Especial:Darrièras_Modificacions',
	'ពិសេស:បំលាស់ប្ដូរថ្មីៗ',
	'Ειδικό:ΠρόσφατεςΑλλαγές',
	'Eri:Viimased_muudatused',
	'Speciel:Seneste_ændringer',
	'Papa_nui:NāLoliHou',
	'Papa_nui:NaLoliHou',
	'Special:最近更改',
	'Лӱмын_ыштыме:Пытартыш_тӧрлатымаш-влак',
	'Spesyal:BakaseywanKenki',
	'Спеціальна:Нові_редагування',
	'Natatangi:Mga_huling_binago',
	'Natatangi:HulingBinago',
	'Специални:Последни_промени',
	'Specala:RecentaChanji',
	'تایبەت:دوایین_گۆڕانکارییەکان',
	'Especial:Canvis_recents',
	'Khas:Perubahan_terkini',
	'Special:Senaste_ändringar',
	'Шпеціална:Послїднї_зміны',
	'Speciale:ÙltimiCanbiamenti',
	'Spesiaal:Leste_wiezigingen',
	'Kusuih:Neuubah_baro',
	'Specialne:Aktualne_změny',
	'خاص:تازيون تبديليون',
	'Special:RecentChanges',
	'Patikos:Votükamsnulik',
	'Dibar:KemmoùDiwezhañ',
	'do=recent' => 'id=$1', // AwkiAwki
	'index.php?page=RecentChanges' => 'index.php?page=$1', // Bitweaver
	#'_Recent', // EditMe; disabled because of conflicts with SeedWiki
	'AllRecentChanges', // PmWiki
	'WebChanges', // TKWiki
        '.cgi?RecentChanges' => '.cgi?$1', // UseModWiki
	'space/changes' => '$1', // Wikispaces
);
// Deal with these ones that have an English Special: followed by a foreign language RecentChanges
foreach ( $RCPossibilities as $RCPossibility ) {
      $exploded = explode ( ':', $RCPossibility );
      if ( isset ( $exploded[1] ) ) {
	    $RCPossibilities[] = 'Special:' . $exploded[1];
      }
}

// Retrieve the interwiki map
$wgInterwikiMapUserAgent =
    'User-Agent: LeucosticteBot (http://mediawiki.org/wiki/User:LeucosticteBot)';
$opts = array(
        'http'=>array(
                'method' => "GET",
                'header' => $wgInterwikiMapUserAgent
        )
);
$wgInterwikiMapApiArgs = '?action=query&meta=siteinfo&siprop=interwikimap&format=json';
$url = 'https://meta.wikipedia.org/w/api.php';
$url .= $wgInterwikiMapApiArgs;
$streamContext = stream_context_create( $opts );
$contents = file_get_contents ( $url, false, $streamContext );
if ( !$contents ) {
        die ( "Retrieval from $url failed\n" );
}
$apiPull = json_decode ( $contents, true );
if ( !$apiPull ) {
        die( "json decode of $url failed\n" );
}
$apiPull = $apiPull['query']['interwikimap'];
foreach ( $apiPull as $apiPullElement ) {
        $apiResult[$apiPullElement["prefix"]] = $apiPullElement["url"];
}

$wikitable = '{{MediaWiki:interwikimapbackup-desc}}'
        . "\n\n==Current interwiki map==\n\n" . '{| class="plainlinks"' . "\n";

// Gather stored data from database table
$res = $db->query ( "SELECT * FROM parsed_mirrored_wikiindex" );
while ( $row = $res->fetch_assoc() ) {
      $unserialized = unserialize ( $row['pmw_data'] );
      $unserialized['wikiindex page title'] = $row['pmw_wikiindex_page_title'];
      // Get the RC URL and convert it to the iw_url
      $unserialized['iw_url'] = '';
      if ( isset ( $unserialized['recentchanges URL'] ) ) {
	    $unserialized['recentchanges URL'] = strtolower ( $unserialized['recentchanges URL'] );
	    foreach ( $RCPossibilities as $key => $RCPossibility ) {
		  $RCPossibility = strtolower ( $RCPossibility );
		  if ( !is_int ( $key ) ) {
			$unserialized['iw_url'] = str_replace ( $key, $RCPossibility,
			      $unserialized['recentchanges URL'] );
		  } elseif ( strpos ( $unserialized['recentchanges URL'], $RCPossibility ) ) {
			$unserialized['iw_url'] = str_replace ( $RCPossibility, '$1',
			      $unserialized['recentchanges URL'] );
		  }
	    }
	    if ( !isset ( $unserialized['iw_prefix'] ) ) {
	    $unserialized['iw_prefix'] = '';
	    }
	    if ( !isset ( $unserialized['status'] ) ) {
	    $unserialized['status'] = '';
	    }
	    // If it's in meta-wiki's map, use that prefix
	    foreach ( $apiPull as $apiPullElement ) {
		  if ( $unserialized['iw_url'] == $apiPullElement['url'] ) {
			$unserialized['iw_prefix'] = $apiPullElement['prefix'];
		  }
	    }
	    // If it's not in meta-wiki's map, but has an active status, then convert the wiki name
	    // or wikiindex page title to a prefix
	    if ( !$unserialized['iw_prefix'] && in_array ( strtolower ( $unserialized['status'] ),
		  $goodStatuses ) && !in_array ( $unserialized['iw_prefix'], $blacklist ) ) {
		  if ( isset ( $unserialized['name'] ) ) {
			$name = $unserialized['name'];
		  } else {
			$name = $unserialized['wikiindex page title'];
		  }
		  foreach ( $forbiddenChars as $forbiddenChar) {
			$name = str_replace ( $forbiddenChar, '', $name );
		  }
		  $name = strtolower ( $name );
		  $approvedPrefix = false;
		  foreach ( $goodPrefixes as $goodPrefix ) {
			if ( strpos ( $name, $goodPrefix ) ) {
			      $approvedPrefix = true;
			}
		  }
		  if ( !$approvedPrefix ) {
			$name .= 'wiki';
		  }
		  $unserialized['iw_prefix'] = $name;
	    }
	    if ( $unserialized['iw_prefix'] ) {
		  $wikitable .= '| ' . $unserialized['iw_prefix'] . ' || ' . $unserialized['iw_url']
		. "\n" . '|-' . "\n";
	    }
      }
}
$wikitable .= "|}\n";
$file = fopen ( 'PMWOutput.txt', 'w' );
fwrite ( $file, $wikitable );
fclose ( $file );