Jump to content

User:Shadowbot2/Source

From Wikipedia, the free encyclopedia

This page contains a cleaned-up version of the Shadowbot2 source.

#!/usr/bin/perl
use Perlwikipedia;
use HTML::Entities;

my $editor=Perlwikipedia->new;

my $starting_time=time; #What time did we start?

my $username='Shadowbot2';
my $password='**********';


$editor->login($username,$password);

my (@protected,@unprotected);

$editor->purge_page('Main Page'); #Make sure we have a clean copy of the Main Page

my $mailing_list_text=$editor->get_text('User:Shadowbot2/Mailing list');

my @mailing_list=$mailing_list_text=~m/\* \[\[User:(.+?)\|.+?\]\]/g; #Parse for all users listed



unless ($main_page_html=~m/Shadowbot2/i) { #Make sure we're still logged in
	$editor->login($username,$password);
}


my @tranclusion_list;

load_template_list();

do_main_page_scan();

do_images_scan();

do_tomorrow_templates_scan();

my $sent_email=0;

unless((scalar @unprotected)==0) { #Send email if needed and mark as such
	send_mail();
	$sent_email=1;
}

my $ending_time=time; #Get various statistics about the run
my $total_time=$ending_time-$starting_time;
my $total_scanned=(scalar @protected)+(scalar @unprotected);

print "I just scanned $total_scanned pages/images. I detected " . (scalar @protected) ." protected pages and " . (scalar @unprotected) . " unprotected pages. The job took $total_time seconds to run.\n\n";
if($sent_email) {print "I sent an alert email to " . (scalar @mailing_list) . " users.\n\n";}
else {print "I did not send an alert email.\n\n";}

print "Pages I scanned and their protection status:\n\n";
#Create the format for the top of the form
format STDOUT_TOP=
Name of page								Result of scan
--------------------------------------------------------------------------------------
.
#Create the report format
format STDOUT=
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<  @<<<<<<<<<<<<<
$page,							       	        $result
.

foreach $page (@protected) { #Write out the protected pages
	$page="[[$page]]";
	$result="Protected";
	write;
}

foreach $page (@unprotected) { #Write out the unprotected pages
	$page="[[$page]]";
	$result="Unprotected";
	write;
}  

sub do_main_page_scan {
	#Save us the hassle of finding the POTD subpage
	push (@transclusion_list,"Template:POTD protected/$CURRENTYEAR-$CURRENTMONTH-$CURRENTDAY2");
	#If it's not a .js or .css page, scan it.
	foreach (@transclusion_list) {
		unless(/\.\w{2,3}/) {scan($_);}
	}	
}

sub do_images_scan {
	my @images;
	foreach (@transclusion_list) {
		my $wikitext=$editor->get_text($_);
		#Get rid of unrendered stuff
		$wikitext=~s/<!--.+?-->//gs;
		$wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
		#Handle <includeonly> text
		if ($wikitext=~m/<includeonly>\[\[(Image:.+)\|.+?\]\]<\/includeonly>/) {
			unless ($1=~m/\{\{.+?\}\}/) {push (@images,$1);}
		}
		#Grab the rest
		if ($wikitext=~m/\[\[(Image:.+?)\|.+?\]\]/) {
			my $image=$1;
			unless ($image=~m/\{\{.+?\}\}/) {
				push (@images,$image);

			}
		}
	}
	foreach (@images) {scan($_);}
}


sub do_tomorrow_templates_scan {
	foreach (@transclusion_list) {
		if (/(\w+) (\d{1,2})$/) { #Check for one date format
			my $month=$1;
			my $day=$2;
			($month,$day)=do_date_increment("$month $day");
			$_ =~ s/\w+ \d{1,2}/$month $day/; #Substitute the new format
			scan($_);

		}
		elsif (/(\w+) (\d{1,2}), \d{4}$/) { #The other format
			my $month=$1;
			my $day=$2;
			my $year=$3;
			($month,$day,$year)=do_date_increment("$month $day $year");
			$_ =~ s/\w+ \d{1,2}, \d{4}/$month $day, $year/;
			scan($_);

		}
	}
}

sub scan {
	my $page=shift;
	#Grab page and decode it
	my $edit_page_contents=$editor->{mech}->get("http://en.wikipedia.org/w/index.php?title=$page&action=edit")->content;
	decode_entities($edit_page_contents);
	#Remove unrendered content
	$edit_page_contents=~s/<!--.+?-->//gs;
	$edit_page_contents=~s/<noinclude>.+?<\/noinclude>//gs;
	#Replace MediaWiki variables with their real values
	$edit_page_contents=do_replace($edit_page_contents);
	#Check if the text is editable
	if ($edit_page_contents =~ m/<textarea .+? readonly='readonly'/) {
		push(@protected,$page);

	}
	else {
		push(@unprotected,$page);
	
	}
}

sub send_mail {
	my $email_text;
	$email_text.="This is an automated report of unprotected templates and images I detected during my scan of the Main Page. This list also includes tomorrow's templates, but not tomorrow's images.\n\n Unprotected templates/images I found were:\n\n";
	foreach (@unprotected) { #Write out the unprotected templates
		$email_text.="[[$_]] -- http://en.wikipedia.org/wiki/$_\n";
	}
	$email_text.="\nThese templates/images should be protected immediately.";
	foreach (@mailing_list) { #Perform for each user subscribed
		$editor->{mech}->get("http://en.wikipedia.org/wiki/Special:Emailuser/$_");
		#Fill out variables
		$editor->{mech}->field('wpSubject','Shadowbot2 report of unprotected templates');
		$editor->{mech}->field('wpText',$email_text);
		$editor->{mech}->click('wpSend'); #Send the email
	}
}

sub load_template_list {

    my $main_page_wikitext=$editor->get_text("Main Page");
    my $variable_page_raw=$editor->{mech}->get("http://en.wikipedia.org/wiki/User:Shadowbot2/Variables")->content;
    #Guess.
	$main_page_wikitext=~s/<!--.+?-->//gs;
	$main_page_wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
	#Grab the current MediaWiki variable values and mark them as global
    if($variable_page_raw=~m/CURRENTMONTHNAME=(\w+)/) {$CURRENTMONTHNAME=$1;}
    if($variable_page_raw=~m/CURRENTDAY=(\d+)/) {$CURRENTDAY=$1;}
    if($variable_page_raw=~m/CURRENTYEAR=(\d+)/) {$CURRENTYEAR=$1;}
    if($variable_page_raw=~m/CURRENTMONTH=(\d+)/) {$CURRENTMONTH=$1;}
    if($variable_page_raw=~m/CURRENTDAY2=(\d+)/) {$CURRENTDAY2=$1;}
    #Do the replace
    $main_page_wikitext=do_replace($main_page_wikitext);
    my @temp_list=$main_page_wikitext=~m/\{\{(.+?)\}\}/g; #Match text inside of curly brackets
	foreach $link (@temp_list) {
		if ($link !~ m/^[A-Z]+$/) { #Make sure it's not a variable we missed (all uppercase)
		   	if ($link !~ m/Wikipedia:/) {$link="Template:".$link;} #Make sure it has a namespace
		   	push (@transclusion_list,$link);
   		}
    }
    #Take care of other templates that would be difficult to regex for
	push(@transclusion_list,"Template:TFAfooter");
	push(@transclusion_list,"Template:SelAnnivFooter");
	push(@transclusion_list,"Template:*mp");
	push(@transclusion_list,"Template:Click");

}

sub do_replace {
	#Self-explanatory
    my $text=shift;

    $text=~s/\{\{CURRENTDAY\}\}/$CURRENTDAY/g;
    $text=~s/\{\{CURRENTYEAR\}\}/$CURRENTYEAR/g;
    $text=~s/\{\{CURRENTMONTH\}\}/$CURRENTMONTH/g;
    $text=~s/\{\{CURRENTDAY2\}\}/$CURRENTDAY2/g;
    $text=~s/\{\{CURRENTMONTHNAME\}\}/$CURRENTMONTHNAME/g;

    return $text;

}

sub do_date_increment {
	#Set up hashes for date conversions
	my %months = ( January	 => 1,
		       February	 => 2,
		       March	 => 3,
		       April	 => 4,
		       May	 => 5,
		       June	 => 6,
		       July	 => 7,
		       August	 => 8,
		       September => 9,
		       October	 => 10,
		       November	 => 11,
		       December	 => 12
	);
	my %number_to_month = (	1 => 'January',
				2 => 'February',
				3 => 'March',
				4 => 'April',
				5 => 'May',
				6 => 'June',
				7 => 'July',
				8 => 'August',
				9 => 'September',
			       10 => 'October',
			       11 => 'November',
			       12 => 'December'
	);

	my %months_less_than_31 = ( April     => 30,
				    June      => 30,
				    September => 30,
				    November  => 30,
				    February  => 28
	);
	my $date  = shift;
	#Split on a space, set the year to 2007 if it wasn't defined
	my $month = (split(/ /,$date))[0];
	my $day   = (split(/ /,$date))[1];
	my $year  = (split(/ /,$date))[2];
	if ($year eq '') { $year=2007;}
	if (!(exists($months_less_than_31{$month})) && $day<31) {
		$day++;
		return ($month,$day,$year);	
	}
	elsif (exists(($months_less_than_31{$month})) && $day<($months_less_than_31{$month})) {
		$day++;
		return ($month,$day,$year);
	}
	elsif (!(exists($months_less_than_31{$month})) && $day==31) {
		$day=1;
		my $temp;
		if ($month eq 'December') {$temp=1; $year=2007;}
		else {
			$temp=$months{$month};
			$temp++;
		}
		$month=$number_to_month{$temp};
		return ($month,$day,$year);
	}
	elsif (exists(($months_less_than_31{$month})) && $day==($months_less_than_31{$month})) {
		$day=1;
		my $temp;
		if ($month eq 'December') {$temp=1; $year=2007;}
		else {
			$temp=$months{$month};
			$temp++;
		}
		$month=$number_to_month{$temp};
		return ($month,$day,$year);
	}
}