Manual:Chris G's botclasses/DownloadAllImagesBot.php
Appearance
This bot uses Chris G's botclasses to download all images from a wiki.
<?php
/* DownloadAllImagesBot.php
* By Leucosticte, https://www.mediawiki.org/wiki/User:Leucosticte
* GNU Public License 2.0
*
* This bot downloads all images from a wiki.
*/
/* Setup my classes. */
include( 'botclasses.php' );
$wiki = new wikipedia;
$wiki->url = "http://en.wikipedia.org/w/api.php";
/* All the login stuff. */
$user = 'REMOVED';
$pass = 'REMOVED';
$wiki->login( $user, $pass );
$dir = "./downloadfiles";
// Create directory if it doesn't exist
if ( !file_exists( $dir ) ) {
echo "Creating directory $dir...\n";
mkdir ( $dir );
}
if ( !is_dir( $dir ) ) {
die ( "$dir is not a directory\n" );
}
// $done = false means that there still are more images left to come
$done = false;
// Initialize the cURL session
$ch = curl_init();
// This corresponds to the API:AllPages parameter "aifrom" which tells it with what page
// title at which to start listing image titles.
$aifrom = '';
// Keep going until it's evident that there are no more images
while ( !$done ) {
// Start preparing an API query to tell the API: Put the list in PHP format; get 500 image titles
// at a time; get the urls for the images; sort the list in ascending order.
$query = "?action=query&format=php&list=allimages&ailimit=500&aiprop=url&aidir=ascending";
if ( $aifrom ) {
$query .= "&aifrom=$aifrom";
}
// Get the result of the API query.
$ret = $wiki->query ( $query );
// If the result doesn't tell us at what page title to start our next query, then that means this
// is the end of the images.
if ( !isset ( $ret['query-continue'] ) ) {
$done = true;
} else {
// The result array has two parts, query and query-continue; this second part tells us where to
// pick up where we left off
$aifrom = $ret['query-continue']['allimages']['aicontinue'];
}
// Loop through that array of 500 image urls and download them all
foreach ( $ret['query']['allimages'] as $element ) {
// Save images in the directory
$filename = "$dir/" . $element['name'];
// If the file already exists, don't save it again
if ( !file_exists ( $filename ) ) {
$f = fopen($filename, "w");
// For information on what this does, see
// https://www.php.net/function.curl-setopt
curl_setopt($ch, CURLOPT_URL, $element['url']);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE );
$g = curl_exec( $ch );
fwrite($f, $g);
fclose($f);
}
}
}