Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Wikipedia: Mediawiki-CVS

SVN: [115089] trunk/wikistats/squids

 

 

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded


ezachte at svn

Apr 30, 2012, 9:52 AM

Post #1 of 1 (37 views)
Permalink
SVN: [115089] trunk/wikistats/squids

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115089

Revision: 115089
Author: ezachte
Date: 2012-04-30 16:52:33 +0000 (Mon, 30 Apr 2012)
Log Message:
-----------
some pl and Config.pm files adapted to new folder hierarchy on stat1 Viz* files added (generate input for animation)

Modified Paths:
--------------
trunk/wikistats/squids/SquidCountryScan.pl
trunk/wikistats/squids/SquidCountryScanConfig.pm
trunk/wikistats/squids/SquidLoadScan.pl
trunk/wikistats/squids/SquidReportArchive.pl
trunk/wikistats/squids/SquidReportArchive.sh
trunk/wikistats/squids/SquidReportArchiveConfig.pm

Added Paths:
-----------
trunk/wikistats/squids/SquidTraceUniqueImages.pl
trunk/wikistats/squids/VizCollectEvents.pl
trunk/wikistats/squids/VizCollectEventsMonth.pl
trunk/wikistats/squids/VizPrepJs.pl

Modified: trunk/wikistats/squids/SquidCountryScan.pl
===================================================================
--- trunk/wikistats/squids/SquidCountryScan.pl 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidCountryScan.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -1,3 +1,4 @@
+
#!/usr/bin/perl
## Collect page views stats by country on Locke
## sub CollectRawData -> SquidDataCountries.csv
@@ -23,22 +24,26 @@
# exit ;
}

- $path_root = $job_runs_on_production_server ? $cfg_path_root_production : $cfg_path_root_test ;
+ $path_csv = $job_runs_on_production_server ? $cfg_path_csv : $cfg_path_csv_test ;
+ $path_log = $job_runs_on_production_server ? $cfg_path_log : $cfg_path_log_test ;
+ $file_log = "SquidCountryScan.log" ;

- $file_raw_data_monthly_visits = "$path_root/SquidDataVisitsPerCountryMonthly.csv" ;
- $file_raw_data_daily_visits = "$path_root/SquidDataVisitsPerCountryDaily.csv" ;
+ $file_raw_data_monthly_visits = "$path_csv/SquidDataVisitsPerCountryMonthly.csv" ;
+ $file_raw_data_daily_visits = "$path_csv/SquidDataVisitsPerCountryDaily.csv" ;
$file_per_country_visits = "public/SquidDataCountriesViews.csv" ;
$file_per_country_visits_old = "SquidDataCountries2.csv" ;

- $file_raw_data_monthly_saves = "$path_root/SquidDataSavesPerCountryMonthly.csv" ;
- $file_raw_data_daily_saves = "$path_root/SquidDataSavesPerCountryDaily.csv" ;
+ $file_raw_data_monthly_saves = "$path_csv/SquidDataSavesPerCountryMonthly.csv" ;
+ $file_raw_data_daily_saves = "$path_csv/SquidDataSavesPerCountryDaily.csv" ;
$file_per_country_saves = "public/SquidDataCountriesSaves.csv" ;
$file_per_country_saves_old = "SquidDataCountriesSaves.csv" ;

&CollectRawData ('visits', $file_per_country_visits, $file_per_country_visits_old, $file_raw_data_monthly_visits, $file_raw_data_daily_visits) ;
&CollectRawData ('saves', $file_per_country_saves, $file_per_country_saves_old, $file_raw_data_monthly_saves, $file_raw_data_daily_saves) ;
- &ProcessRawData ;
+# &ProcessRawData ;

+ print "\n\nReady\n\n" ;
+
exit ;

sub CollectRawData
@@ -60,7 +65,7 @@

while ($true)
{
- $dir = "$path_root/" . sprintf ("%04d-%02d", $year, $month) ;
+ $dir = "$path_csv/" . sprintf ("%04d-%02d", $year, $month) ;
$yyyymm = sprintf ("%04d-%02d", $year, $month) ;
if (-d $dir)
{
@@ -243,6 +248,7 @@
}
}

+# not operational, obsolete? Q&D code?
sub ProcessRawData
{
print "\nProcessRawData\n\n" ;
@@ -457,7 +463,6 @@
{ $ratio = sprintf ("%5.1f", $count_edits / $count_submits) ; }
$text .= sprintf ("%-14s",'total') . "edits " . sprintf ("%6d", $count_edits) . ", submits ". sprintf ("%6d", $count_submits) . ", ratio $ratio\n" ;
$text .= "\n\n" ;
- print $count

$text .= "Count per relevant status with redlink:\n" ;
foreach $key (sort keys %counts_per_relevant_status_with_redlink)
@@ -469,7 +474,7 @@
}
$text .= "\n\n" ;

- open SUMMARY, '>', $file_txt_summary ;
+ open SUMMARY, '>', "$path_log/$file_log" ;
print SUMMARY $text ;
close SUMMARY ;

@@ -489,3 +494,10 @@
my $days = ($timegm2-$timegm1) / (24*60*60) ;
return ($days) ;
}
+
+sub Log
+{
+ my $msg = shift ;
+ print $msg ;
+}
+

Modified: trunk/wikistats/squids/SquidCountryScanConfig.pm
===================================================================
--- trunk/wikistats/squids/SquidCountryScanConfig.pm 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidCountryScanConfig.pm 2012-04-30 16:52:33 UTC (rev 115089)
@@ -1,7 +1,11 @@
#!/usr/bin/perl

- $cfg_liblocation = "/home/ezachte/lib" ;
+ $cfg_liblocation = "/a/squid/stats/scripts" ;

- $cfg_path_root_production = "/a/ezachte/" ;
- $cfg_path_root_test = "w:/! perl/squids/archive/" ; # Erik
-# $cfg_path_root_test = "?" ; # Andr\xE9
+ $cfg_path_csv = "/a/squid/stats/csv/" ;
+ $cfg_path_csv_test = "w:/! perl/squids/archive/" ; # Erik
+# $cfg_path_csv_test = "?" ; # Andr\xE9
+
+ $cfg_path_log = "/a/squid/stats/scripts/" ;
+ $cfg_path_log_test = "w:/! perl/squids/archive/" ; # Erik
+# $cfg_path_log_test = "?" ; # Andr\xE9

Modified: trunk/wikistats/squids/SquidLoadScan.pl
===================================================================
--- trunk/wikistats/squids/SquidLoadScan.pl 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidLoadScan.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -104,6 +104,7 @@
{
$avg_delta_all_regular_squids = sprintf ("%.0f", $all_regular_squids_delta_hour {$date_hour} / $all_regular_squids_active {$date_hour}) ;
print CSV "$date_hour,$avg_delta_all_regular_squids\n" ;
+ print "$date_hour,$avg_delta_all_regular_squids\n" ;
}
close CSV ;
}

Modified: trunk/wikistats/squids/SquidReportArchive.pl
===================================================================
--- trunk/wikistats/squids/SquidReportArchive.pl 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidReportArchive.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -1,5 +1,7 @@
#!/usr/bin/perl

+ $| = 1; # Flush output
+
use SquidReportArchiveConfig ;
use lib $cfg_liblocation ;

@@ -32,18 +34,20 @@

undef %country_code_not_specified_reported ;

- $path_in = $job_runs_on_production_server ? $cfg_path_in_production : $cfg_path_in_test ;
- $path_out = $job_runs_on_production_server ? $cfg_path_out_production : $cfg_path_out_test ;
+ $path_csv = $job_runs_on_production_server ? $cfg_path_csv : $cfg_path_csv_test ;
+ $path_reports = $job_runs_on_production_server ? $cfg_path_reports : $cfg_path_reports_test ;
+ $path_log = $job_runs_on_production_server ? $cfg_path_log : $cfg_path_log_test ;

- &Log ("Path in = $path_in\n") ;
- &Log ("Path out = $path_out\n") ;
+ &Log ("Path csv = $path_csv\n") ;
+ &Log ("Path reports = $path_reports\n") ;
+ &Log ("Path log = $path_log\n") ;

# following test needs to change -> remove server name dependency (new run argument ?)
# elsif ($hostname eq 'bayes')
# {
# &Log ("\n\nJob runs on server $hostname\n\n") ;
-# $path_in = "/home/ezachte/wikistats/animation" ;
-# $path_out = "/home/ezachte/wikistats/animation" ;
+# $path_csv = "/home/ezachte/wikistats/animation" ;
+# $path_reports = "/home/ezachte/wikistats/animation" ;
# }

$file_csv_country_meta_info = "SquidReportCountryMetaInfo.csv" ;
@@ -52,28 +56,66 @@
# 'http://en.wikipedia.org/wiki/List_of_countries_by_population'
# 'http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users'
if (defined ($options {"w"}))
- { &ReadWikipedia ; &Log ("Ready\n") ; exit ; }
+ {
+ &ReadWikipedia ;
+ &Log ("\n\nReady\n\n") ;
+ exit ;
+ }
+ elsif (defined ($options {"c"}))
+ {
+ $reportcountries = $true ;
+ &Log ("\nGenerate report per country\n\n") ;

- if (defined ($options {"c"}))
- { $reportcountries = $true ; }
+ if (defined ($options {"q"}))
+ {
+ $quarter_only = $options {"q"} ; # process for this quarter only
+ if ($quarter_only !~ /^2\d\d\dQ\d$/)
+ { abort ("Specify run for one single quarter as -q yyyyQ[1-4], e.g. -q 2011Q3, not '$quarter_only'\n") ; }
+ $quarter_only =~ s/^(\d\d\d\d)(Q\d)$/$1 $2/ ;
+ &Log ("\nRun for one quarter only: $quarter_only\n\n") ;
+ }
+ }
+ elsif (defined ($options {"m"}) || defined ($options {"d"}))
+ {
+ if (($options {"m"} !~ /^\d\d\d\d-\d\d$/) && ($options {"d"} !~ /^-\d+$/))
+ { &Log ("Specify month as -m yyyy-mm or days back as -d -[days] (e.g. -d -1 for yesterday)") ; exit ; }

- if (defined ($options {"q"}))
- {
- $quarter_only = $options {"q"} ; # process for this quarter only
- if ($quarter_only !~ /^2\d\d\dQ\d$/)
- { abort ("Specify run for one single quarter as -q yyyyQ[1-4], e.g. -q 2011Q3, not '$quarter_only'\n") ; }
- $quarter_only =~ s/^(\d\d\d\d)(Q\d)$/$1 $2/ ;
- &Log ("QUARTER ONLY $quarter_only\n") ;
+ $reportdaysback = $options {"d"} ;
+ $reportmonth = $options {"m"} ;
+
+ if ($reportdaysback =~ /^-\d+$/)
+ {
+ ($sec,$min,$hour,$day,$month,$year) = localtime (time+$reportdaysback*86400) ;
+ $reportmonth = sprintf ("%04d-%02d",$year+1900,$month+1) ;
+ }
+
+ &Log ("Report month = $reportmonth\n") ;
}
+ else { &Log ("No valid run option found. Specify -c [-q ..]| -m ..| -d ..| -w") ; exit ; }

+
# date range used to be read from csv file with ReadDate, now there are daily csv files
# if earlier methods still is useful it needs to be tweaked
# if (($reportmonth ne "") && ($reportmonth !~ /^\d{6}$/))

+ if ($quarter_only ne '')
+ { $path_reports = "$path_reports/$quarter_only" ; }
+ elsif ($reportmonth ne '')
+ { $path_reports = "$path_reports/$reportmonth" ; }
+ elsif ($reportcountries)
+ { $path_reports = "$path_reports/countries" ; }
+
+ print "Write report to $path_reports\n" ;
+
+ $path_reports =~ s/ /-/g ;
+ if (! -d $path_reports)
+ {
+ # print "mkdir $path_reports\n" ;
+ mkdir ($path_reports) || die "Unable to create directory $path_reports\n" ;
+ }
+
&InitProjectNames ;

- $file_csv_country_codes = "CountryCodes.csv" ;
-
&ReadInputCountriesNames ;

if ($reportcountries)
@@ -90,25 +132,12 @@
exit ;
}

- $reportdaysback = $options {"d"} ;
- $reportmonth = $options {"m"} ;
-
- if (($reportmonth !~ /^\d\d\d\d-\d\d$/) && ($reportdaysback !~ /^-\d+$/))
- { &Log ("Specify month as -m yyyy-mm or days back as -d -[days] (e.g. -d -1 for yesterday)") ; exit ; }
-
- if ($reportdaysback =~ /^-\d+$/)
- {
- ($sec,$min,$hour,$day,$month,$year) = localtime (time+$reportdaysback*86400) ;
- $reportmonth = sprintf ("%04d-%02d",$year+1900,$month+1) ;
- }
- &Log ("Report month = $reportmonth\n") ;
-
$days_in_month = &DaysInMonth (substr ($reportmonth,0,4), substr ($reportmonth,5,2)) ;

$threshold_mime = 0 ;
$threshold_project = 10 ;

- $file_log = "WikiReportsSampledVisitorsLog.log" ;
+ $file_log = "SquidReportArchive.log" ;

$file_html_crawlers = "SquidReportCrawlers.htm" ;
$file_html_methods = "SquidReportMethods.htm" ;
@@ -124,6 +153,7 @@
$file_html_clients_html = "SquidReportClientsHtmlOnly.htm" ;
$file_html_countries_info = "SquidReportCountryData.htm" ;

+ $file_csv_user_agents = "SquidReportUserAgents.csv" ;
# names till 2010-07-01
#
# $file_csv_crawlers = "SquidDataCrawlers.csv" ;
@@ -161,8 +191,8 @@

print "\n\nJob SquidReportArchive.pl\n\n" ;

- if (! -d "$path_in/$reportmonth")
- { print "Directory not found: $path_in\/$reportmonth\n" ; exit ; }
+ if (! -d "$path_csv/$reportmonth")
+ { print "Directory not found: $path_csv\/$reportmonth\n" ; exit ; }

# for ($month = 4 ; $month <= 10 ; $month ++)
# {
@@ -173,7 +203,7 @@
# last if ($month == 10) && ($day > 24) # temp code stay with DST summer time zone for SV

$date = $reportmonth . "-". sprintf ("%02d", $day) ;
- $dir = "$path_in/$reportmonth/$date" ;
+ $dir = "$path_csv/$reportmonth/$date" ;

if (-d $dir)
{
@@ -195,9 +225,6 @@
if ($#dirs_process < 0)
{ print "No valid data to process.\n" ; exit ; }

- $path_reports = "$path_in/$reportmonth" ;
- print "Write report to $path_reports\n" ;
-
$google_ip_ranges = "<b>IP ranges:</b> known ip ranges for Google are 64.233.[160.0-191.255], 66.249.[64.0-95.255], 66.102.[0.0-15.255], 72.14.[192.0-255.255], <br>74.125.[0.0-255.255], " .
"209.085.[128.0-255.255], 216.239.[32.0-63.255] and a few minor other subranges</small><p>\n" ;

@@ -211,6 +238,7 @@
{
$days_input_found ++ ;

+ print "\nRead input from $path_process\n" ;
&ReadInputClients ;
&ReadInputCrawlers ;
&ReadInputMethods ;
@@ -347,9 +375,9 @@
$file_csv_per_country_overview = "SquidReport${selection}PerCountryOverview.csv" ;
$file_csv_per_country_density = "SquidReport${selection}PerCountryDensity.csv" ;

- $path_csv_squid_counts_monthly = "$path_in/$file_csv_squid_counts_monthly" ;
+ $path_csv_squid_counts_monthly = "$path_csv/$file_csv_squid_counts_monthly" ;
if (! -e $path_csv_squid_counts_monthly) { abort ("Input file $path_csv_squid_counts_monthly not found!") ; }
- $path_csv_squid_counts_daily = "$path_in/$file_csv_squid_counts_daily" ;
+ $path_csv_squid_counts_daily = "$path_csv/$file_csv_squid_counts_daily" ;
if (! -e $path_csv_squid_counts_daily) { abort ("Input file $path_csv_squid_counts_daily not found!") ; }

&ReadInputCountriesMonthly ($project_mode) ;
@@ -567,7 +595,7 @@
{
&Log ("ReadCountryCodes\n") ;

- open CODES, '<', "$path_in/$file_csv_country_codes" ;
+ open CODES, '<', "$path_csv/$file_csv_country_codes" ;
while ($line = <CODES>)
{
if ($line =~ /^[A-Z]/)
@@ -1364,7 +1392,8 @@
{
&Log ("ReadInputCountriesNames\n") ;

- $path_csv_country_codes = "$path_in/$file_csv_country_codes" ;
+ $file_csv_country_codes = "CountryCodes.csv" ;
+ $path_csv_country_codes = "$path_csv/$file_csv_country_codes" ;
if (! -e $path_csv_country_codes) { abort ("Input file $path_csv_country_codes not found!") ; }

open CSV_COUNTRY_CODES, '<', $path_csv_country_codes ;
@@ -1409,13 +1438,12 @@

# http://en.wikipedia.org/wiki/List_of_countries_by_population
# http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users
- &Log ("Read $path_in/$file_csv_country_meta_info\n") ;
- open COUNTRY_META_INFO, '<', "$path_in/$file_csv_country_meta_info" ;
+ &Log ("Read $path_csv/$file_csv_country_meta_info\n") ;
+ open COUNTRY_META_INFO, '<', "$path_csv/$file_csv_country_meta_info" ;
binmode COUNTRY_META_INFO ;
while ($line = <COUNTRY_META_INFO>)
{
chomp $line ;
-
$line =~ s/[\x00-\x1f]//g ;

($country,$link,$population,$connected,$icon) = split ',', $line ;
@@ -1447,6 +1475,7 @@

if ($connected eq 'connected')
{ &Log ("connected unknown: $country\n") ; }
+
$connected =~ s/connected/../g ;
$country_meta_info {$country} = "$link,$population,$connected,$icon" ;

@@ -1466,6 +1495,7 @@
{
$country_name = $country_names {$country_code} ;
$country_meta = $country_meta_info {$country_name} ;
+
my ($link,$population,$connected,$icon) = split (',', $country_meta) ;

$region_code = $region_codes {$country_code} ;
@@ -1948,6 +1978,22 @@
my $file_csv = "$path_process/$file_csv_countries_info" ;
if (! -e $file_csv)
{ abort ("Function ReadInputCountryInfo: file $file_csv not found!!!") ; }
+ #$allcountrytotal = 0 ;
+ #$countrytotal = { } ;
+ #$allcountrybrowser = { } ;
+ #$countrybrowser = { } ;
+ #$allcountryos = { } ;
+ #$countryos = { } ;
+ #$allcountrymobile = 0 ;
+ #$countrymobile = { } ;
+ undef $allcountrytotal ;
+ undef %countrytotal ;
+ undef %allcountrybrowser ;
+ undef %countrybrowser ;
+ undef %allcountryos ;
+ undef %countryos ;
+ undef $allcountrymobile ;
+ undef %countrymobile ;
open CSV_COUNTRIES_INFO, '<', $file_csv ;
while ($line = <CSV_COUNTRIES_INFO>)
{
@@ -4848,7 +4894,10 @@
$altbgcolor = '#DDFFDD' ;

open FILE_HTML_USER_AGENTS, '>', "$path_reports/$file_html_user_agents" ;
+ open FILE_CSV_USER_AGENTS, '>', "$path_reports/$file_csv_user_agents" ;

+ $csv_out = "# user agents lay-out\n" ;
+ $csv_out = "# pageviews total, pageviews mobile, pageviews main, opensearch, all total, all mobile, all main, all other\n" ;
$html = $header ;
$html =~ s/TITLE/Wikimedia Traffic Analysis Report - User Agent Overview/ ;
$html =~ s/HEADER/Wikimedia Traffic Analysis Report - User Agent Overview/ ;
@@ -4857,7 +4906,7 @@
$html =~ s/X1000/&rArr; <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;

$html .= "<table border=1>\n" ;
-
+
$html .= "<tr><th class=l valign='top' rowspan=2 colspan=4>&nbsp;</th><th rowspan=16>&nbsp;</th><th class=c colspan=5>Page views</th><th rowspan=16>&nbsp;</th><th class=c colspan=5>All requests</th><th rowspan=16>&nbsp;</th></tr>\n" ;
$html .= "<tr><th class=c>Total</th><th class=c>Percentage</th><th class=c>To mobile</th><th class=c>To main site</th><th class=c>Search-based estimate<a href='#explain_search'>[1]</a></th>" ;
$html .= "<th class=c>Total</th><th class=c>Percentage</th><th class=c>To mobile</th><th class=c>To main site</th><th class=c>To other servers<a href='#explain_other'>[2]</a></th></tr>\n" ;
@@ -4915,7 +4964,7 @@
$html =~ s/TITLE/Wikimedia Traffic Analysis Report - Data per Country/ ;
$html =~ s/HEADER/Wikimedia Traffic Analysis Report - Data per Country/ ;
$html =~ s/ALSO/&nbsp;See also: <b>LINKS<\/b>/ ;
- $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $dummy_user_agents \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_user_agents \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
$html =~ s/X1000/&rArr; <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;

$html .= "<table border=1 width=800>\n" ;
@@ -5175,10 +5224,10 @@

sub WriteCsvCountriesTimed
{
- &Log ("WriteCsvCountriesTimed: $path_out/$file_csv_countries_timed\n") ;
+ &Log ("WriteCsvCountriesTimed: $path_csv/$file_csv_countries_timed\n") ;

$multiplier_1000 = 1000 * $multiplier ;
- open CSV_COUNTRIES_TIMED, '>', "$path_out/$file_csv_countries_timed" ;
+ open CSV_COUNTRIES_TIMED, '>', "$path_csv/$file_csv_countries_timed" ;

foreach $target (sort keys %targets)
{
@@ -5230,9 +5279,9 @@
# http://www.maxmind.com/app/iso3166 country codes
sub WriteCsvCountriesGoTo
{
- &Log ("WriteCsvCountriesGoTo: $path_out/$file_csv_countries_languages_visited\n") ;
+ &Log ("WriteCsvCountriesGoTo: $path_csv/$file_csv_countries_languages_visited\n") ;

- open CSV_COUNTRIES_LANGUAGES_VISITED, '>', "$path_out/$file_csv_countries_languages_visited" ;
+ open CSV_COUNTRIES_LANGUAGES_VISITED, '>', "$path_csv/$file_csv_countries_languages_visited" ;

foreach $country (sort keys %countries)
{
@@ -5398,7 +5447,7 @@
$index = &HtmlIndex (join '/ ', sort (@index_languages)) ;
$html =~ s/INDEX/$index/ ;

- &PrintHtml ($html, "$path_out/$file_html_per_language_breakdown") ;
+ &PrintHtml ($html, "$path_reports/$file_html_per_language_breakdown") ;
}

sub WriteReportPerCountryOverview
@@ -5689,7 +5738,7 @@
$html =~ s/TOTAL/$html_total/ ;
$html =~ s/REGIONS/$html_regions/ ;

- &PrintHtml ($html, "$path_out/$file_html_per_country_overview") ;
+ &PrintHtml ($html, "$path_reports/$file_html_per_country_overview") ;
}

#sub WriteReportPerCountryOverviewLine
@@ -5845,7 +5894,7 @@

# $file_csv_per_country_overview2 = $file_csv_per_country_overview ;
# $file_csv_per_country_overview2 =~ s/\.csv/-$postfix.csv/ ;
- &PrintCsv ($header_csv_countries . join ('', sort @csv_countries), "$path_out/$file_csv_per_country_density") ;
+ &PrintCsv ($header_csv_countries . join ('', sort @csv_countries), "$path_csv/$file_csv_per_country_density") ;
}

sub WriteCsvSvgFilePerCountryOverview
@@ -6022,7 +6071,7 @@

$file_csv_per_country_overview2 = $file_csv_per_country_overview ;
$file_csv_per_country_overview2 =~ s/\.csv/-$postfix.csv/ ;
- &PrintCsv ($header_csv_countries . join ('', sort @csv_countries), "$path_out/svg/$file_csv_per_country_overview2") ;
+ &PrintCsv ($header_csv_countries . join ('', sort @csv_countries), "$path_csv/svg/$file_csv_per_country_overview2") ;

# $perc_tot = 0 ;
# foreach $code (keys_sorted_by_value_num_desc %requests_per_connected_persons)
@@ -6386,9 +6435,9 @@
$html =~ s/INDEX/$index/ ;

if (! $show_logcount)
- { &PrintHtml ($html, "$path_out/$file_html_per_country_breakdown") ; }
+ { &PrintHtml ($html, "$path_reports/$file_html_per_country_breakdown") ; }
else
- { &PrintHtml ($html, "$path_out/$file_html_per_country_breakdown_huge") ; }
+ { &PrintHtml ($html, "$path_reports/$file_html_per_country_breakdown_huge") ; }
}

sub WriteReportPerCountryTrends
@@ -6527,7 +6576,7 @@
$index = &HtmlIndex (join '/ ', sort (@index_countries)) ;
$html =~ s/INDEX/$index/ ;

- &PrintHtml ($html, "$path_out/$file_html_per_country_trends") ;
+ &PrintHtml ($html, "$path_reports/$file_html_per_country_trends") ;
}

sub CorrectForMissingDays
@@ -6681,7 +6730,7 @@
# close "FILE_LOG" ;
# }
# open "FILE_LOG", ">>", "$path_reports/$file_log" || abort ("Log file '$file_log' could not be opened.") ;
- open "FILE_LOG", ">>", "$path_reports/$file_log" || abort ("Log file '$file_log' could not be opened.") ;
+ open "FILE_LOG", ">>", "$path_log/$file_log" || abort ("Log file '$file_log' could not be opened.") ;
&Log ("\n\n===== Wikimedia Sampled Visitors Log Report / " . date_time_english (time) . " =====\n\n") ;
}

@@ -7200,8 +7249,8 @@
{ &Log ("$country\n") ; }
}

- &Log ("Write $path_in/$file_csv_country_meta_info\n\n") ; # use $path_in, not $path_out so that next step picks up proper file
- open COUNTRY_META_INFO, '>', "$path_in/$file_csv_country_meta_info" ;
+ &Log ("Write $path_csv/$file_csv_country_meta_info\n\n") ;
+ open COUNTRY_META_INFO, '>', "$path_csv/$file_csv_country_meta_info" ;
foreach $country (sort keys %countries)
{ print COUNTRY_META_INFO $countries {$country} ; }
close COUNTRY_META_INFO ;

Modified: trunk/wikistats/squids/SquidReportArchive.sh
===================================================================
--- trunk/wikistats/squids/SquidReportArchive.sh 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidReportArchive.sh 2012-04-30 16:52:33 UTC (rev 115089)
@@ -1,24 +1,33 @@
#! /bin/sh
ulimit -v 4000000

-home="/a/ezachte"
-month=2011-08
+month=2012-01
+quarter=2012Q1

-# perl $home/SquidReportArchive.pl -m 201007 > SquidReportArchiveLog.txt
+stats=/a/squid/stats
+scripts=$stats/scripts
+cd $scripts
+
+# perl $scripts/SquidReportArchive.pl -m 201007 > SquidReportArchiveLog.txt
# after further automating SquidScanCountries.sh

-# perl $home/SquidCountryScan.pl # start in July 2009
-# perl $home/SquidReportArchive.pl -c # >> SquidReportArchiveLog.txt # -c for per country reports
-# perl $home/SquidReportArchive.pl -c -q 2010Q2 # >> SquidReportArchiveLog.txt # -c for per country reports
-perl $home/SquidReportArchive.pl -m $month # >> SquidReportArchiveLog.txt
+# perl SquidCountryScan.pl # collect csv data for all months, start in July 2009
+# perl SquidReportArchive.pl -c # >> SquidReportArchiveLog.txt # -c for per country reports
+# perl SquidReportArchive.pl -c -q $quarter # >> SquidReportArchiveLog.txt # -c for per country reports
+# perl SquidReportArchive.pl -m $month # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2011-10 # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2011-11 # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2011-12 # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2012-01 # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2012-02 # >> SquidReportArchiveLog.txt
+perl SquidReportArchive.pl -m 2012-03 # >> SquidReportArchiveLog.txt
+

-ls -l /a/ezachte/reports*$month*
-rm /a/ezachte/reports*$month*
-
-tar -cf /a/ezachte/$month/$month-html.tar /a/ezachte/$month/*.htm
-cp /a/ezachte/$month/$month-html.tar ./reports-traffic-$month.tar
+exit
+tar -cf $stats/$month/$month-html.tar $reports/$month/*.htm
+cp $reports/$month/$month-html.tar ./reports-traffic-$month.tar
tar -cf reports-countries-$month.tar SquidReportPage*.htm
bzip2 -f reports-traffic-$month.tar
bzip2 -f reports-countries-$month.tar
tar -cf reports-$month.tar reports-*-$month.tar.bz2
-rm /a/ezachte/reports*$month*.bz2
+rm $reports/reports*$month*.bz2

Modified: trunk/wikistats/squids/SquidReportArchiveConfig.pm
===================================================================
--- trunk/wikistats/squids/SquidReportArchiveConfig.pm 2012-04-30 12:39:01 UTC (rev 115088)
+++ trunk/wikistats/squids/SquidReportArchiveConfig.pm 2012-04-30 16:52:33 UTC (rev 115089)
@@ -1,16 +1,20 @@
#!/usr/bin/perl

- $cfg_liblocation = "/home/ezachte/lib" ;
+ $cfg_liblocation = "/a/squid/stats/scripts" ;

- $cfg_path_in_production = "/a/ezachte" ;
- $cfg_path_out_production = "/a/ezachte" ;
-# $cfg_path_in_test = "W:/# Out Locke" ; # Erik
-# $cfg_path_out_test = "W:/# Out Test/Locke" ; # Erik
- $cfg_path_in_test = "/srv/erik/" ; # Andr\xE9
- $cfg_path_out_test = "/srv/erik/" ; # Andr\xE9
+ $cfg_path_csv = "/a/squid/stats/csv" ;
+ $cfg_path_reports = "/a/squid/stats/reports" ;
+ $cfg_path_log = "/a/squid/stats/scripts" ;
+
+ $cfg_path_csv_test = "W:/# Out Locke" ; # Erik
+ $cfg_path_reports_test = "W:/# Out Test/Locke" ; # Erik
+ $cfg_path_log_test = "W:/# Out Test/Locke" ; # Erik
+# $cfg_path_csv_test = "/srv/erik/" ; # Andr\xE9
+# $cfg_path_reports_test = "/srv/erik/" ; # Andr\xE9
+ $cfg_path_log_test = "/srv/erik/" ; # Andr\xE9

# set default arguments for test on local machine
-# $cfg_default_argv = "-m 2011-07" ; # monthly report
+ $cfg_default_argv = "-m 2011-08" ; # monthly report
# $cfg_default_argv = "-w" ; # refresh country info from Wikipedia (population etc)
# $cfg_default_argv = "-c" ; # country/regional reports
- $cfg_default_argv = "-c -q 2011Q4" ; # country/regional reports based on data for one quarter only
+# $cfg_default_argv = "-c -q 2011Q4" ; # country/regional reports based on data for one quarter only

Added: trunk/wikistats/squids/SquidTraceUniqueImages.pl
===================================================================
--- trunk/wikistats/squids/SquidTraceUniqueImages.pl (rev 0)
+++ trunk/wikistats/squids/SquidTraceUniqueImages.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+
+ $max_days_ago = 100 ;
+ $time_start = time ;
+
+ open CSV_OUT, '>', '/a/ezachte/SquidDataTrendUniqueImages.csv' ;
+ print CSV_OUT ",unique files,,,,unique images\n" ;
+ print CSV_OUT "date,count,delta,,date,count,delta\n" ;
+
+ for ($days_ago = $max_days_ago ; $days_ago > 0 ; $days_ago --)
+ {
+ ($day,$month,$year) = (localtime ($time_start - 3600 * 24 * $days_ago))[3,4,5];
+ $month++ ;
+ $year+=1900 ;
+ $yyyy_mm = sprintf ("%04d-%02d", $year, $month) ;
+ $yyyy_mm_dd = sprintf ("%04d-%02d-%02d", $year, $month, $day) ;
+ $date_excel = "\"=DATE($year,$month,$day)\"" ;
+
+ # print "$days_ago days ago -> $yyyy_mm_dd\n" ;
+
+ $file = "/a/ezachte/$yyyy_mm/$yyyy_mm_dd/public/SquidDataBinaries.csv" ;
+
+ if (! -e $file)
+ { print "No file $file\n" ; next }
+
+ print "Process $file\n" ;
+
+ open CSV_IN, '<', $file ;
+ while ($line = <CSV_IN>)
+ {
+ chomp $line ;
+
+ next if $line =~ /^#/ ;
+ next if $line =~ /^:/ ;
+
+ if ($line =~ /,.*,/) # forgot to encode comma's in image name
+ {
+ $line =~ s/,([^,]*)$/#?#?#$1/;
+ $line =~ s/,/%47/g;
+ $line =~ s/\#\?\#\?\#/,/;
+ }
+
+ if ($line =~ /,.*,/) # not fixed ?
+ {
+ $line =~ s/,([^,]*)$/^#^$1/;
+ $line =~ s/,/%47/g;
+ print "\nSkip $line\n" ;
+ next ;
+ }
+
+ ($file,$count) = split (',', $line) ;
+
+ if ($files {$file} == 0)
+ {
+ $unique_files++ ;
+ $files {$file} += $count ;
+ }
+
+ # print "1 $file\n" ;
+ $file =~ s/^.*\/\d\/\d\w\/// ;
+ # print "2 $file\n" ;
+ $file =~ s/\/.*$// ;
+ # print "3 $file\n" ;
+
+ if ($images {$file} == 0)
+ {
+ $unique_images++ ;
+ $images {$file} += $count ;
+ }
+
+ }
+
+ $delta_files = $unique_files - $unique_files_prev ;
+ $delta_images = $unique_images - $unique_images_prev ;
+
+ print "$days_ago,$date_excel,$unique_files,$unique_images\n" ;
+ print CSV_OUT "$date_excel,$unique_files,$delta_files,,$date_excel,$unique_images,$delta_images\n" ;
+
+ $unique_files_prev = $unique_files ;
+ $unique_images_prev = $unique_images ;
+ }
+
+

Added: trunk/wikistats/squids/VizCollectEvents.pl
===================================================================
--- trunk/wikistats/squids/VizCollectEvents.pl (rev 0)
+++ trunk/wikistats/squids/VizCollectEvents.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -0,0 +1,223 @@
+#!/usr/bin/perl
+
+ use Time::Local ;
+ use Compress::Zlib;
+ use Getopt::Std ;
+
+ my $options ;
+ getopt ("d", \%options) ;
+ $date = $options {"d"} ;
+
+ die "Specify date as yyyy/mm/dd" if $date !~ /^\d\d\d\d\/\d\d\/\d\d$/ ;
+ ($year,$month,$day) = split ('\/', $date) ;
+
+ $date1 = sprintf ("%04d%02d%02d", $year, $month, $day) ;
+ $time = timegm (0,0,0,$day,$month-1,$year-1900) ;
+ ($sec,$min,$hour,$day2,$month2,$year2) = gmtime ($time+24*3600) ;
+ $date2 = sprintf ("%04d%02d%02d", $year2+1900, $month2+1, $day2) ;
+
+ if (-d "/a/ezachte")
+ {
+ $dir_in = "/a/squid/archive" ;
+ $dir_out = "/a/ezachte" ;
+ }
+ else
+ {
+ print "Test on Windows\n" ;
+ use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; # install IO-Compress-Zlib
+ use IO::Compress::Gzip qw(gzip $GzipError) ; # install IO-Compress-Zlib
+
+ $dir_in = "." ;
+ $dir_out = "." ;
+ }
+
+ $dir_out .= "/" . sprintf ("%04d-%02d", $year, $month) ;
+
+ if (! -d $dir_out)
+ {
+ print "mkdir $dir_out\n" ;
+ mkdir ($dir_out) || die "Unable to create directory $dir_out\n" ;
+ }
+
+ $dir_out .= "/" . sprintf ("%04d-%02d-%02d", $year, $month, $day) ;
+ if (! -d $dir_out)
+ {
+ print "mkdir $dir_out\n" ;
+ mkdir ($dir_out) || die "Unable to create directory $dir_out\n" ;
+ print "mkdir $dir_out/private\n" ;
+ mkdir ("$dir_out/private") || die "Unable to create directory $dir_out/private\n" ;
+ print "mkdir $dir_out/public\n" ;
+ mkdir ("$dir_out/public" ) || die "Unable to create directory $dir_out/public\n" ;
+ }
+
+ &CollectEdits ($date1,$date2) ;
+# &CollectViews ($date1,$date2) ;
+
+ print "\n\nReady\n\n" ;
+ exit ;
+
+sub CollectEdits
+{
+ my ($date1,$date2) = @_ ;
+
+ $file_date1 = "$dir_in/edits.log-$date1.gz" ;
+ $file_date2 = "$dir_in/edits.log-$date2.gz" ;
+ $file_out = "$dir_out/private/SquidDataEditsVizDoNotPublish-$date1.gz" ;
+
+ die ("File not found: $file_date1\n") if (! -e $file_date1) ;
+ die ("File not found: $file_date2\n") if (! -e $file_date2) ;
+
+# open OUT, '>', "$dir_out/edits-$date1.txt" ;
+ $gz_out = gzopen ($file_out, "wb") || die "Unable to write $file_out $!\n" ;
+ &FilterRequests ($file_date1,$date1,$date2) ;
+ &FilterRequests ($file_date2,$date1,$date2) ;
+ $gz_out->gzclose();
+# close OUT ;
+}
+
+sub CollectViews
+{
+ my ($date1,$date2) = @_ ;
+
+ $file_date1 = "$dir_in/sampled-1000.log-$date1.gz" ;
+ $file_date1 = "$dir_in/sampled-1000.log-$date2.gz" ;
+ $file_out = "$dir_out/viz-edits-$date1.gz" ;
+
+ die ("File not found: $file_date1\n") if (! -e $file_date1) ;
+ die ("File not found: $file_date2\n") if (! -e $file_date2) ;
+
+# open OUT, '>', "$dir_out/edits-$date1.txt" ;
+ $gz_out = gzopen ($file_out, "wb") || die "Unable to write $file_out $!\n" ;
+ &FilterRequests ($file_date1,$date1,$date2) ;
+ &FilterRequests ($file_date2,$date1,$date2) ;
+ $gz_out->gzclose();
+# close OUT ;
+}
+
+sub FilterRequests
+{
+ my ($file,$date1,$date2) = @_ ;
+
+ $date1 = substr ($date1,0,4) . '-' . substr ($date1,4,2) . '-' . substr ($date1,6,2) ;
+ $date2 = substr ($date2,0,4) . '-' . substr ($date2,4,2) . '-' . substr ($date2,6,2) ;
+
+ print "\n\nFilterRequests $file $date1 $date2\n\n" ;
+
+ # open IN,"-|", "gzip -dc $file" ;
+ $gz_in = gzopen ($file, "r") || die "Unable to read $file $!\n" ;
+# open IN,"<", $file ;
+
+# while ($line = <IN>)
+ my $lines = 0 ;
+ my $lines2 = 0 ;
+ while ($gz_in->gzreadline ($line) > 0)
+ {
+ @fields = split ' ', $line ;
+ $time = $fields [2] ;
+ $ip = $fields [4] ;
+ $action = $fields [5] ;
+ $url = $fields [8] ;
+ $agent = lc ($fields [13]) ;
+
+ if ($lines++ % 10000 == 0)
+ { print "$time\n" ; }
+
+ last if $time =~ /^$date2/ ; # many lines for subsequent data on second file
+ next if $time !~ /^$date1/ ; # many lines for previous day on first file
+
+ if ($lines2++ == 0)
+ {
+ print "\n\nStart copying...\n\n" ;
+ print "$time\n" ;
+ }
+
+ next if $url !~ /action=submit/ ;
+ next if $action ne "TCP_MISS/302" ;
+
+ if (($agent =~ /bot/i) || ($agent =~ /https?:\/\//))
+ { $bot = 'B' ; }
+ else
+ { $bot = 'M' ; }
+
+ $url =~ s/^.*?\/\/// ;
+ ($domain,$location) = split ('\/',$url,2) ;
+ $domain = &Abbreviate ($domain) ;
+ if (($domain =~ /\./o) ||
+ ($domain !~ /^[\*\@\%]?!(wb|wn|wp|wq|ws|wv|wk|wx|xx|wm|mw|wmf)\:/o))
+ {
+ $unrecognized_domains {$domain_original} ++ ;
+ $domain = 'other' ;
+ }
+ $domain =~ s/!//o ; # not sure why this happens after Abbreviate, kept inline with SquidCountArchiveProcessLogRecord.pm
+
+ $time = substr ($time,0,19) ; # omit msec
+ $line = "$time,$ip,$domain,$bot\n" ;
+ $gz_out->gzwrite($line) || die "Zlib error writing to $gzfile: $gz_out->gzerror\n" ;
+ }
+
+ print "$time\n" ;
+
+ $gz_in->gzclose();
+}
+
+sub Abbreviate # copied from SquidCountArchiveProcessLogrecord, someday make it separate module
+{
+ my $domain = shift ;
+
+ $domain =~ s/www\.([^\.]+\.[^\.]+\.[^\.]+)/$1/o ;
+ $domain =~ s/\.com/\.org/o ;
+ $domain =~ s/^([^\.]+\.org)/www.$1/o ;
+
+ if ($domain !~ /\.org/o)
+ { $domain =~ s/www\.(wik[^\.\/]+)\.([^\.\/]+)/$2.$1.org/o ; }
+
+# $legend = "# wx = wikispecial (commons|mediawiki|meta|foundation|species)\n" ;
+# $legend .= "# xx:upload = upload.wikimedia.org\n" ;
+ $domain =~ s/commons\.wikimedia\.org/!wx:commons/o ;
+ $domain =~ s/www\.mediawiki\.org/!wx:mediawiki/o ;
+ $domain =~ s/meta\.wikipedia\.org/!wx:meta/o ;
+ $domain =~ s/meta\.wikimedia\.org/!wx:meta/o ;
+ $domain =~ s/foundation\.wikimedia\.org/!wx:foundation/o ;
+ $domain =~ s/species\.wikimedia\.org/!wx:species/o ;
+ $domain =~ s/upload\.wikimedia\.org/!xx:upload/o ;
+
+# $legend .= "# wmf = wikimediafoundation\n" ;
+# $legend .= "# wb = wikibooks\n" ;
+# $legend .= "# wn = wikinews\n" ;
+# $legend .= "# wp = wikipedia\n" ;
+# $legend .= "# wq = wikiquote\n" ;
+# $legend .= "# ws = wikisource\n" ;
+# $legend .= "# wv = wikiversity\n" ;
+# $legend .= "# wk = wiktionary\n" ;
+# $legend .= "# wm = wikimedia\n" ;
+# $legend .= "# mw = mediawiki\n" ;
+# $legend .= "# \@ = .mobile.\n" ;
+# $legend .= "# \* = .wap.\n" ;
+# $legend .= "# \% = .m.\n" ;
+
+ $domain =~ s/wikimediafoundation/!wmf/o ;
+ $domain =~ s/wikibooks/!wb/o ;
+ $domain =~ s/wikinews/!wn/o ;
+ $domain =~ s/wikipedia/!wp/o ;
+ $domain =~ s/wikiquote/!wq/o ;
+ $domain =~ s/wikisource/!ws/o ;
+ $domain =~ s/wikiversity/!wv/o ;
+ $domain =~ s/wiktionary/!wk/o ;
+ $domain =~ s/wikimedia/!wm/o ;
+ $domain =~ s/mediawiki/!mw/o ;
+
+ $domain =~ s/\.mobile\./.@/o ;
+ $domain =~ s/\.wap\./.*/o ;
+ $domain =~ s/\.m\./.%/o ;
+
+# if ($domain =~ /^error:/o)
+# { $domain_errors {$domain}++ ; }
+# $domain =~ s/error:.*$/!error:1/o ;
+
+ $domain =~ s/^([^\.\/]+)\.([^\.\/]+)\.org/$2:$1/o ;
+
+ $domain =~ s/\s//g ;
+
+ return ($domain) ;
+}
+

Added: trunk/wikistats/squids/VizCollectEventsMonth.pl
===================================================================
--- trunk/wikistats/squids/VizCollectEventsMonth.pl (rev 0)
+++ trunk/wikistats/squids/VizCollectEventsMonth.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+ use lib "/home/ezachte/lib" ;
+ use EzLib ;
+ $trace_on_exit = $true ;
+ ez_lib_version (14) ;
+
+ use Time::Local ;
+ use Compress::Zlib;
+ use Getopt::Std ;
+
+ my $options ;
+ getopt ("m", \%options) ;
+ $month = $options {"m"} ;
+
+ die "Specify month as -m yyyy/mm" if $month !~ /^\d\d\d\d\/\d\d$/ ;
+ ($year,$month) = split ('\/', $month) ;
+
+ $days = &days_in_month ($year,$month) ;
+ for ($day = 1 ; $day <= $days ; $day++)
+ {
+ $date = sprintf ("%04d/%02d/%02d",$year,$month,$day) ; ;
+ $cmd = "\nperl VizCollectEvents.pl -d $date" ;
+ print "\n$cmd ->\n" ;
+
+ $result = `$cmd` ;
+ @results = split ("\n", $result) ;
+ foreach $line (@results)
+ { print "# $line\n" ; }
+ }
+
+ print "\n\nReady\n\n" ;
+ exit ;
+
+

Added: trunk/wikistats/squids/VizPrepJs.pl
===================================================================
--- trunk/wikistats/squids/VizPrepJs.pl (rev 0)
+++ trunk/wikistats/squids/VizPrepJs.pl 2012-04-30 16:52:33 UTC (rev 115089)
@@ -0,0 +1,839 @@
+#!/usr/bin/perl
+
+# http://tutorialajax.com/compress-javascript-with-gzip.html
+
+ use lib "/home/ezachte/lib" ;
+ use EzLib ;
+ $trace_on_exit = $true ;
+ ez_lib_version (14) ;
+
+ use Time::Local ;
+ use Geo::IP ;
+ use Compress::Zlib;
+ use Getopt::Std ;
+
+ my $options ;
+ getopt ("mr", \%options) ;
+
+# if (defined $options {'d'})
+# {
+# die "Specify -d or -m , not both" if defined $options {'m'} ;
+# $date = $options {"d"} ;
+# die "Specify date as -d yyyy/mm/dd" if $date !~ /^\d\d\d\d\/\d\d\/\d\d$/ ;
+# ($year,$month,$day) = split ('\/', $date) ;
+# }
+
+ if (defined $options {'m'})
+ {
+ $month = $options {"m"} ;
+ die "Specify month as -m yyyy/mm" if $month !~ /^\d\d\d\d\/\d\d$/ ;
+ ($year,$month) = split ('\/', $month) ;
+ }
+ else
+ {
+ ($sec,$min,$hour,$day,$month,$year) = gmtime (time) ;
+ $year += 1900 ;
+ $month ++ ;
+ if ($month > 1)
+ { $month-- ; }
+ else
+ { $month = 12 ; $year-- ; }
+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
+ }
+ print "Collect data for month: $yyyymm\n" ;
+
+ @months = qw (January February March April May June July August September October November December) ;
+ $name_month = @months [$month-1] ;
+
+ # round latitude and longitude to 1/$resolution degrees
+ $resolution = 8 ;
+ if (defined $options {'r'})
+ {
+ $resolution = $options {"r"} ;
+ die "Specify resolution as -r [2|4|8]" if $resolution !~ /^(?:2|4|8)$/ ;
+ }
+
+ my $gi = Geo::IP -> open ("GeoLiteCity.dat", GEOIP_STANDARD) || die "Could not open GeoLiteCity.dat" ;
+
+ $date1 = sprintf ("%04d%02d%02d", $year, $month, $day) ;
+ $time = timegm (0,0,0,$day,$month-1,$year-1900) ;
+ ($sec,$min,$hour,$day2,$month2,$year2) = gmtime ($time+24*3600) ;
+ $date2 = sprintf ("%04d%02d%02d", $year2+1900, $month2+1, $day2) ;
+
+ if (-d "/a/ezachte")
+ {
+ $dir_in = "/a/ezachte/$yyyymm" ;
+ $dir_out = "/a/ezachte" ;
+ }
+ else
+ {
+ print "Test on Windows\n" ;
+ use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; # install IO-Compress-Zlib
+ # use IO::Compress::Gzip qw(gzip $GzipError) ; # install IO-Compress-Zlib
+
+ $dir_in = "." ;
+ $dir_out = "." ;
+ }
+
+ &ListLanguages ;
+ &ReadEdits ;
+ &ReadViews ;
+
+# die "Too few edit files found: $days_processed_edits" if $days_processed_edits < 15 ;
+# die "Too few view files found: $days_processed_views" if $days_processed_views < 15 ;
+
+ &WriteJs ;
+
+ print "Days processed for edits:$days_processed_edits\n" ;
+ print "Days processed for views:$days_processed_views\n" ;
+
+ print "\n\nReady\n\n" ;
+ exit ;
+
+sub ReadEdits
+{
+ print "\n\nReadEdits\n\n" ;
+# for ($day = 1 ; $day <= 3 ; $day++)
+ for ($day = 1 ; $day <= days_in_month ($year, $month) ; $day++)
+ {
+ # rememember individual edits only for last day for which file was found
+ undef @data_edits_bots ;
+ undef %latlong_edits_manual ;
+ $date_edits = sprintf ("%04d/%02d/%02d", $year, $month, $day) ;
+
+ $yyyymmdd = sprintf ("%04d-%02d-%02d", $year, $month, $day) ;
+ ($yyyymmdd2 = $yyyymmdd) =~ s/-//g ;
+ $file = "$dir_in/$yyyymmdd/private/SquidDataEditsVizDoNotPublish-$yyyymmdd2.gz" ;
+ print "\nProcess $file\n\n" ;
+
+ if (! -e $file)
+ { print "File not found!\n" ; next ; }
+
+ $days_processed_edits ++ ;
+
+ my $gz_in = new IO::Uncompress::Gunzip $file or die "gunzip failed: $GunzipError\n";
+ binmode $gz_in ;
+
+ while ($line = <$gz_in>)
+ {
+ # last if $lines_manual > 1000 ;
+
+ chomp $line ;
+ ($time,$ip,$domain,$bot) = split (',', $line) ;
+
+ next if $domain !~ /wp/ ;
+
+ $rec = $gi -> record_by_name ($ip) ;
+ next if ! defined $rec ;
+
+ $time = substr ($time,11,8) ;
+ $hh = substr ($time,0,2) ;
+ $mm = substr ($time,3,2) ;
+ $ss = substr ($time,6,2) ;
+ $min = 60 * $hh + $mm ;
+
+ $domain =~ s/^.*?:// ;
+ $lang = lc ($domain) ;
+
+ $country_code = $rec->country_code ;
+ $country_code3 = $rec->country_code3 ;
+ $country_name = $rec->country_name ;
+ $region = $rec->region ;
+ $region_name = $rec->region_name ;
+ $city = $rec->city ;
+ $postal_code = $rec->postal_code ;
+ $latitude = $rec->latitude ;
+ $longitude = $rec->longitude ;
+
+ $secs = (24*3600 + $hh * 3600 + $mm * 60 + $ss + int(rand(1200)-600)) % (24*3600) ; # drift +/- 10 minutes
+
+ # always use low resolution for individual edits, to protect against de-anonimization
+ $longitude2 = sprintf ("%.0f", $longitude * 2) + 180 * 2 ; # resolution 1/2 degree
+ $latitude2 = sprintf ("%.0f", $latitude * 2) + 90 * 2 ; # resolution 1/2 degree
+
+ $longitude = sprintf ("%.0f", $longitude * $resolution) + 180 * $resolution ; # resolution 1/$resolution degree
+ $latitude = sprintf ("%.0f", $latitude * $resolution) + 90 * $resolution ; # resolution 1/$resolution degree
+
+ $locations {"$longitude,$latitude"} ++ ;
+
+ # musings on performance:
+ # http://code.flickr.com/blog/2009/03/18/building-fast-client-side-searches/
+
+ if ($bot eq 'B')
+ { push @data_edits_bots, "$secs,$lang,$country_code,$longitude2,$latitude2;" ; $lines_bot++ ; }
+ else
+ { $latlong_edits_manual {"$longitude2,$latitude2,$country_code"} .= "$secs,$lang;" ; $lines_manual++ ; }
+
+ $line = "$bot,$lines_bot,$lines_manual,$time,$secs,$min,$lang,$country_code,$country_code3,$country_name,$region,$region_name,$city,$postal_code,$longitude,$latitude\n" ;
+ if (($lines_bot + $lines_manual) % 10000 == 0)
+ { print $line ; }
+
+ # if ($lang !~ /^(?:en|ja|de|fr|es|ru|it|pt|pl|nl|)$/)
+ # { $lang = 'xx' ; }
+ $edits_per_location {"$lang,$longitude,$latitude"} ++ ;
+ $edits_per_location {"all,$longitude,$latitude"} ++ ;
+ if ($langcodes {$lang} >= 20)
+ { $edits_per_location {"rest,$longitude,$latitude"} ++ ; }
+
+ $edits_per_language {$lang} ++ ;
+ $edits_per_language {"all"} ++ ;
+ if ($langcodes {$lang} >= 20)
+ { $edits_per_language {"rest"} ++ ; }
+ }
+ close ($gz_in) ;
+ }
+ print "Edit files processed: " . (0+$days_processed_edits) . "\n" ;
+
+ return ;
+}
+
+sub ReadViews
+{
+ print "\n\nReadViews\n\n" ;
+# for ($day = 1 ; $day <= 3 ; $day++)
+ for ($day = 1 ; $day <= days_in_month ($year, $month) ; $day++)
+ {
+ $yyyymmdd = sprintf ("%04d-%02d-%02d", $year, $month, $day) ;
+ ($yyyymmdd2 = $yyyymmdd) =~ s/-//g ;
+ $file = "$dir_in/$yyyymmdd/private/SquidDataViewsVizDoNotPublish-$yyyymmdd2.gz" ;
+ print "\nProcess $file\n\n" ;
+
+ if (! -e $file)
+ { print "File not found!\n" ; next ; }
+
+ $days_processed_views ++ ;
+
+ my $gz_in = new IO::Uncompress::Gunzip $file or die "gunzip failed: $GunzipError\n";
+ binmode $gz_in ;
+
+ $lines_read = 0 ;
+ while ($line = <$gz_in>)
+ {
+ # for tests:
+ # last if $lines_read > 1000 ;
+
+ chomp $line ;
+ ($time,$ip,$domain,$bot,$mobile,$os,$client,$mimecat) = split (',', $line) ;
+
+ next if $domain !~ /wp/ ;
+ next if $mimecat ne "page" ;
+
+ $rec = $gi -> record_by_name ($ip) ;
+ next if ! defined $rec ;
+ next if $bot eq 'B' ;
+
+ $mobile_os_client {"$mobile,$os,$client"} ++ ;
+
+ $time = substr ($time,11,8) ;
+ $hh = substr ($time,0,2) ;
+ $mm = substr ($time,3,2) ;
+ $ss = substr ($time,6,2) ;
+ $min = 60 * $hh + $mm ;
+
+ $domain =~ s/^.*?:// ;
+ $lang = lc ($domain) ;
+
+ $country_code = $rec->country_code ;
+ $country_code3 = $rec->country_code3 ;
+ $country_name = $rec->country_name ;
+ $region = $rec->region ;
+ $region_name = $rec->region_name ;
+ $city = $rec->city ;
+ $postal_code = $rec->postal_code ;
+ $latitude = $rec->latitude ;
+ $longitude = $rec->longitude ;
+
+ $secs = (24*3600 + $hh * 3600 + $mm * 60 + $ss + int(rand(1200)-600)) % (24*3600) ; # drift +/- 10 minutes
+
+ # next if $secs < 12 * 3600 ; # one hour only for iPod iPhone
+ # next if $secs > 13 * 3600 ; # one hour only for iPod iPhone
+
+ $longitude = sprintf ("%.0f", $longitude * $resolution) + 180 * $resolution ; # resolution 1/$resolution degree
+ $latitude = sprintf ("%.0f", $latitude * $resolution) + 90 * $resolution ; # resolution 1/$resolution degree
+
+ $locations {"$longitude,$latitude"} ++ ;
+
+ $line = "$time,$secs,$min,$lang,$country_code,$country_code3,$country_name,$region,$region_name,$city,$postal_code,$longitude,$latitude\n" ;
+ if ($lines_read++ % 10000 == 0)
+ { print $line ; }
+
+ # if ($lang !~ /^(?:en|ja|de|fr|es|ru|it|pt|pl|nl|)$/)
+ # { $lang = 'xx' ; }
+
+ $views_per_location {"$lang,$longitude,$latitude"} ++ ;
+ $views_per_location {"all,$longitude,$latitude"} ++ ;
+ if ($langcodes {$lang} >= 20)
+ { $views_per_location {"rest,$longitude,$latitude"} ++ ; }
+
+ $views_per_language {$lang} ++ ;
+ $views_per_language {"all"} ++ ;
+ if ($langcodes {$lang} >= 20)
+ { $views_per_language {"rest"} ++ ; }
+
+ $platform_per_location {"$mobile,$longitude,$latitude"} ++ ;
+ }
+ close ($gz_in) ;
+ }
+ print "View files processed: " . (0+$days_processed_views) . "\n" ;
+
+# open CSV_TEST, ">", "test.csv" ;
+# foreach $key (sort {$mobile_os_client {$b} <=> $mobile_os_client {$a}} keys %mobile_os_client)
+# { print CSV_TEST "$key," . $mobile_os_client {$key} . "\n" ; }
+# close CSV_TEST ;
+}
+
+sub WriteJs
+{
+# @data_edits_manual = sort {$a <=> $b} @data_edits_manual ;
+# $data_edits_manual = join ('', @data_edits_manual) ;
+ foreach $latlong (sort keys %latlong_edits_manual)
+ {
+ $coords ++ ;
+ $edits = $latlong_edits_manual {$latlong} ;
+ $edits =~ s/;$// ;
+ @edits = split (';', $edits) ;
+ @edits = sort {$a <=> $b} @edits ;
+
+ $time_prev = 0 ;
+ foreach $edit (@edits)
+ {
+ ($time,$lang) = split (',', $edit) ;
+ $edit = ($time - $time_prev) . ",$lang" ;
+ $time_prev = $time ;
+ }
+ $edits = join (';', @edits) ;
+ $data_edits_manual .= "$latlong|$edits^" ;
+ }
+ $data_edits_manual =~ s/\^$// ;
+
+# at least as long as server side compression does't work
+# shrink file significantly further, to be patched back in browser
+ $data_edits_manual =~ s/,en;/E/g ;
+ $data_edits_manual =~ s/,ja;/J/g ;
+ $data_edits_manual =~ s/,de;/D/g ;
+ $data_edits_manual =~ s/,fr;/F/g ;
+ $data_edits_manual =~ s/,es;/S/g ;
+ $data_edits_manual =~ s/,ru;/R/g ;
+ $data_edits_manual =~ s/,it;/I/g ;
+ $data_edits_manual =~ s/,pt;/P/g ;
+ $data_edits_manual =~ s/,pl;/L/g ;
+ $data_edits_manual =~ s/,nl;/N/g ;
+
+ foreach $key (sort keys %edits_per_location)
+ {
+ # store daily averages
+ $data_edit_freq .= $key . ',' . sprintf ("%.0f", $edits_per_location {$key} / $days_processed_edits) . ';' ;
+ $lines_frequency++ ;
+ }
+ $data_edit_freq =~ s/;$// ;
+
+ foreach $location (sort keys %locations)
+ {
+ $desktop = (0 + $platform_per_location {"-,$location"}) ;
+ $mobile = (0 + $platform_per_location {"M,$location"}) ;
+
+ $tot_views_desktop += $desktop ;
+ $tot_views_mobile += $mobile ;
+
+ if ($desktop + $mobile >= 10)
+ {
+ $perc_mobile = sprintf ("%.0f", 100 * ($mobile / ($desktop + $mobile))) ;
+ print "$location DESKTOP $desktop MOBILE $mobile PERC $perc_mobile\n" ;
+ }
+
+ # store daily averages
+ $desktop = sprintf ("%.0f", $desktop / $days_processed_views) ;
+ $mobile = sprintf ("%.0f", $mobile / $days_processed_views) ;
+ next if $desktop + $mobile == 0 ;
+
+ $data_view_freq .= "$location,$desktop,$mobile;" ;
+ $lines_viewstats++ ;
+ }
+
+ $tot_views_desktop = sprintf ("%.0f", $tot_views_desktop / $days_processed_views) ;
+ $tot_views_mobile = sprintf ("%.0f", $tot_views_mobile / $days_processed_views) ;
+ print "Views per day: " . ($tot_views_desktop + $tot_views_mobile) . "\n" ;
+ print "Percentage mobile: " . sprintf ("%.1f\%", 100 * $tot_views_mobile / ($tot_views_desktop + $tot_views_mobile)) . "\n" ;
+
+ $data_view_freq =~ s/;$// ;
+
+ $data_languages_sort_by_code =~ s/'/&quote;/g ;
+ $data_languages_sort_by_edits =~ s/'/&quote;/g ;
+
+ open MANUAL, '>', "WikipediaRequestsInit-$yyyymm-$resolution.js" ;
+ print MANUAL "var resolution = $resolution ;\n" ;
+ print MANUAL "var date_edits = '$date_edits' ;\n" ;
+ print MANUAL "var name_month = '$name_month' ;\n\n" ;
+ print MANUAL "function data_init()\n{\n" ;
+ print MANUAL "data_languages_sort_by_code='$data_languages_sort_by_code';\n\n" ;
+ print MANUAL "data_languages_sort_by_edits='$data_languages_sort_by_edits';\n\n" ;
+ print MANUAL "data_edits='$data_edits_manual';\n\n" ;
+ print MANUAL "data_freq='$data_edit_freq';\n\n" ;
+ print MANUAL "data_viewstats='$data_view_freq';" ;
+ print MANUAL "\n}\n" ;
+ close MANUAL ;
+
+ @data_edits_bots = sort {$a <=> $b} @data_edits_bots ;
+ $data_edits_bots = join ('', @data_edits_bots) ;
+ $data_edits_bots =~ s/;\\\n$// ;
+ open BOTS, '>', "WikipediaBotEditsInit-$yyyymm.js" ;
+ print BOTS "var resolution = $resolution ;\n" ;
+ print BOTS "function init()\n{\ndata_bots='$data_edits_bots';\n}\n" ;
+ close BOTS ;
+
+ print "\nBot events written: $lines_bot\n\n" ;
+ print "\nEdits written: $lines_manual single edits, $lines_frequency points\n\n" ;
+
+ print "Lat/long pairs written: $coords\n\n" ;
+
+ foreach $lang (sort {$edits_per_language {$b} <=> $edits_per_language {$a}} keys %edits_per_language)
+ { print "$lang:" . $edits_per_language {$lang} . "\n" ; }
+}
+
+sub ListLanguages
+{
+ &SetLanguageInfo ;
+
+ die "Could not open 'StatisticsMonthly.csv'" if ! -e 'StatisticsMonthly.csv' ;
+
+ # counts months history per language
+ open CSV, '<', 'StatisticsMonthly.csv' ;
+ while ($line = <CSV>)
+ {
+ chomp $line ;
+ ($lang,$date,@fields) = split (',' , $line) ;
+ $months {$lang} ++ ;
+ }
+ close CSV ;
+
+ # for last 12 months per language accumulate edits
+ $lang_prev = '' ;
+ open CSV, '<', 'StatisticsMonthly.csv' ;
+ while ($line = <CSV>)
+ {
+ chomp $line ;
+ ($lang,$date,@fields) = split (',' , $line) ;
+ $months2 {$lang} ++ ;
+ if (($lang ne $lang_prev) && ($lang_prev ne ''))
+ {
+ next if $lang eq 'commons' ;
+ $edits_per_lang_last_12_months {$lang_prev} = $edits_prev ;
+ print "$lang_prev,$date_prev,$edits_prev\n" ;
+ $edits_prev = 0 ;
+ }
+ $lang_prev = $lang ;
+ $date_prev = $date ;
+ if ($months2 {$lang} > $months {$lang} - 12)
+ { $edits_prev += $fields [11] ; }
+ }
+ $edits_per_lang_last_12_months {$lang_prev} = $edits_prev ;
+ print "$lang_prev,$date_prev,$edits_prev\n" ;
+ close CSV ;
+
+ foreach $lang (sort keys %edits_per_lang_last_12_months)
+ {
+ $lang_name = $out_languages {$lang} ;
+ next if $lang_name eq '' ;
+ $data_languages_sort_by_code .= "$lang:$lang_name;" ;
+ }
+ $data_languages_sort_by_code =~ s/;$// ;
+
+ foreach $lang (sort {$edits_per_lang_last_12_months {$b} <=> $edits_per_lang_last_12_months {$a}} keys %edits_per_lang_last_12_months)
+ {
+ $lang_name = $out_languages {$lang} ;
+
+ next if $lang_name eq '' ;
+ $langcodes {$lang} = $count_languages++ ;
+ # print "$lang: ${langcodes {$lang}}\n" ;
+
+ $data_languages_sort_by_edits .= "$lang:$lang_name;" ;
+ }
+ $data_languages_sort_by_edits =~ s/;$// ;
+
+# print "$data_languages_sort_by_code\n\n" ;
+# print "$data_languages_sort_by_edits\n\n" ;
+}
+
+sub SetLanguageInfo
+{
+ # taken from http://meta.wikimedia.org/wiki/List_of_Wikipedias
+ # see also http://www.loc.gov/standards/iso639-2/php/English_list.php
+ # url might have been generated from language code, but there were (and will be?) exceptions
+ # see also http://meta.wikimedia.org/wiki/Special:SiteMatrix
+ # latest language name corrections provided by Mark Williamson
+ # see also http://meta.wikimedia.org/wiki/Languages
+
+ # numbers in square brackets: number of speakers in millions according to
+ # http://en.wikipedia.org/w/index.php?title=List_of_languages_by_number_of_native_speakers&oldid=305926069 (Aug 5 2009)
+ # includes secondary speakers (hence adds up to much more than 6 billion)
+ %wikipedias = (
+# mediawiki=>"http://wikimediafoundation.org Wikimedia",
+ nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
+ sources=>"http://wikisource.org Multilingual&nbsp;Wikisource",
+ meta=>"http://meta.wikimedia.org Meta-Wiki",
+ beta=>"http://beta.wikiversity.org Beta",
+ species=>"http://species.wikipedia.org Wikispecies",
+ commons=>"http://commons.wikimedia.org Commons",
+ foundation=>"http://wikimediafoundation.org Foundation",
+ strategy=>"http://strategy.wikimedia.org Strategic&nbsp;Planning",
+ outreach=>"http://outreach.wikimedia.org Outreach",
+ incubator=>"http://incubator.wikimedia.org Incubator",
+ usability=>"http://usability.wikimedia.org Usability&nbsp;Initiative",
+ sep11=>"http://sep11.wikipedia.org In&nbsp;Memoriam",
+ nlwikimedia=>"http://nl.wikimedia.org Wikimedia&nbsp;Nederland",
+ plwikimedia=>"http://pl.wikimedia.org Wikimedia&nbsp;Polska",
+ mediawiki=>"http://www.mediawiki.org MediaWiki",
+ dewikiversity=>"http://de.wikiversity.org Wikiversit&auml;t",
+ frwikiversity=>"http://fr.wikiversity.org Wikiversit&auml;t",
+ wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
+ wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
+ aa=>"http://aa.wikipedia.org Afar [1.4,AF]",
+ ab=>"http://ab.wikipedia.org Abkhazian [0.125,AS]",
+ ace=>"http://ace.wikipedia.org Acehnese [3,AS]",
+ af=>"http://af.wikipedia.org Afrikaans [13,AF]",
+ ak=>"http://ak.wikipedia.org Akan [19,AF]",
+ als=>"http://als.wikipedia.org Alemannic [10,EU]", # was Elsatian
+ am=>"http://am.wikipedia.org Amharic [25,AF]",
+ an=>"http://an.wikipedia.org Aragonese [0.01,EU]",
+ ang=>"http://ang.wikipedia.org Anglo-Saxon [,EU]",
+ ar=>"http://ar.wikipedia.org Arabic [530,AF,AS]",
+ arc=>"http://arc.wikipedia.org Aramaic [2.2,AS]",
+ arz=>"http://arz.wikipedia.org Egyptian Arabic [76,AF]",
+ as=>"http://as.wikipedia.org Assamese [13,AS,I]",
+ ast=>"http://ast.wikipedia.org Asturian [0.275,EU]",
+ av=>"http://av.wikipedia.org Avar [0.06,EU]",
+ ay=>"http://ay.wikipedia.org Aymara [2.2,SA]",
+ az=>"http://az.wikipedia.org Azeri [27,AS]",
+ ba=>"http://ba.wikipedia.org Bashkir [1.9,AS]",
+ bar=>"http://bar.wikipedia.org Bavarian [12,EU]",
+ bat_smg=>"http://bat-smg.wikipedia.org Samogitian [0.5,EU]",
+ "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
+ bcl=>"http://bcl.wikipedia.org Central Bicolano [2.5,AS]",
+ be=>"http://be.wikipedia.org Belarusian [6.5,EU]",
+ "be-x-old"=>"http://be-x-old.wikipedia.org Belarusian (Taraškievica) [6.5,EU]",
+ be_x_old=>"http://be-x-old.wikipedia.org Belarusian (Taraškievica) [6.5,EU]",
+ bg=>"http://bg.wikipedia.org Bulgarian [12,EU]",
+ bh=>"http://bh.wikipedia.org Bihari [,AS,I]",
+ bi=>"http://bi.wikipedia.org Bislama [0.2,OC]",
+ bjn=>"http://bjn.wikipedia.org Banjar [3.5,AS]",
+ bm=>"http://bm.wikipedia.org Bambara [6,AF]",
+ bn=>"http://bn.wikipedia.org Bengali [230,AS,I]",
+ bo=>"http://bo.wikipedia.org Tibetan [7,AS]",
+ bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri [0.45,AS,I]",
+ br=>"http://br.wikipedia.org Breton [0.25,EU]",
+ bs=>"http://bs.wikipedia.org Bosnian [2.7,EU]",
+ bug=>"http://bug.wikipedia.org Buginese [4,AS]",
+ bxr=>"http://bxr.wikipedia.org Buryat [0.4,AS]",
+ ca=>"http://ca.wikipedia.org Catalan [9,EU]",
+ cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano [0.607,AS]",
+ "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
+ cdo=>"http://cdo.wikipedia.org Min Dong [9.1,AS,C]",
+ ce=>"http://ce.wikipedia.org Chechen [1.33,EU]",
+ ceb=>"http://ceb.wikipedia.org Cebuano [20,AS]",
+ ch=>"http://ch.wikipedia.org Chamorro [0.06,OC]",
+ cho=>"http://cho.wikipedia.org Choctaw [0.0179,NA]", # was Chotaw
+ chr=>"http://chr.wikipedia.org Cherokee [0.018,NA]",
+ chy=>"http://chy.wikipedia.org Cheyenne [0.000712,NA]",
+ ckb=>"http://ckb.wikipedia.org Sorani [4,AS]",
+ co=>"http://co.wikipedia.org Corsican [0.25,EU]",
+ cr=>"http://cr.wikipedia.org Cree [0.117,NA]",
+ crh=>"http://crh.wikipedia.org Crimean Tatar [0.456,EU,AS]",
+ cs=>"http://cs.wikipedia.org Czech [12,EU]",
+ csb=>"http://csb.wikipedia.org Cassubian [0.05,EU]",
+ cu=>"http://cv.wikipedia.org Old Church Slavonic [,EU]",
+ cv=>"http://cv.wikipedia.org Chuvash [1.3,AS]",
+ cy=>"http://cy.wikipedia.org Welsh [0.75,EU]",
+ da=>"http://da.wikipedia.org Danish [6,EU]",
+ de=>"http://de.wikipedia.org German [185,EU]",
+ diq=>"http://diq.wikipedia.org Zazaki [2,AS]",
+ dk=>"http://dk.wikipedia.org Danish [6]",
+ dsb=>"http://dsb.wikipedia.org Lower Sorbian [0.014,EU]",
+ dv=>"http://dv.wikipedia.org Divehi [0.3,AS,I]",
+ dz=>"http://dz.wikipedia.org Dzongkha [0.6,AS,I]",
+ ee=>"http://ee.wikipedia.org Ewe [3.5,AF]",
+ el=>"http://el.wikipedia.org Greek [15,EU]",
+ eml=>"http://eml.wikipedia.org Emilian-Romagnol [2,EU]",
+ en=>"http://en.wikipedia.org English [1500,EU,NA,OC,AS,AF]",
+ eo=>"http://eo.wikipedia.org Esperanto [1.1,AL]",
+ es=>"http://es.wikipedia.org Spanish [500,EU,NA,SA,AS,AF]",
+ et=>"http://et.wikipedia.org Estonian [1.25,EU]",
+ eu=>"http://eu.wikipedia.org Basque [1.06,EU]",
+ ext=>"http://ext.wikipedia.org Extremaduran [0.5,EU]",
+ fa=>"http://fa.wikipedia.org Persian [107,AS]",
+ ff=>"http://ff.wikipedia.org Fulfulde [13,AF]",
+ fi=>"http://fi.wikipedia.org Finnish [6,EU]",
+ "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro [0.07,EU]",
+ fiu_vro=>"http://fiu-vro.wikipedia.org Voro [0.07,EU]",
+ fj=>"http://fj.wikipedia.org Fijian [0.55,OC]",
+ fo=>"http://fo.wikipedia.org Faroese [0.07,EU]", # was Faeroese
+ fr=>"http://fr.wikipedia.org French [200,EU,NA,AF,OC]",
+ frp=>"http://frp.wikipedia.org Arpitan [0.113,EU]",
+ frr=>"http://frr.wikipedia.org North Frisian [0.01,EU]",
+ fur=>"http://fur.wikipedia.org Friulian [0.794,EU]",
+ fy=>"http://fy.wikipedia.org Frisian [0.65,EU]",
+ ga=>"http://ga.wikipedia.org Irish [0.53,EU]",
+ gan=>"http://gan.wikipedia.org Gan [35,AS,C]",
+ gay=>"http://gay.wikipedia.org Gayo",
+ gd=>"http://gdi.wikipedia.org Scots Gaelic [0.07,EU]", # was Scottish Gaelic
+ gl=>"http://gl.wikipedia.org Galician [3.5,EU]", # was Galego
+ glk=>"http://glk.wikipedia.org Gilaki [3.3,AS]",
+ gn=>"http://gn.wikipedia.org Guarani [7,SA]",
+ got=>"http://got.wikipedia.org Gothic [,EU]",
+ gu=>"http://gu.wikipedia.org Gujarati [46,AS,I]",
+ gv=>"http://gv.wikipedia.org Manx [0.0017,EU]", # was Manx Gaelic
+ ha=>"http://ha.wikipedia.org Hausa [39,AF]",
+ hak=>"http://hak.wikipedia.org Hakka [34,AS,C]",
+ haw=>"http://haw.wikipedia.org Hawai'ian [0.027,OC]", # was Hawaiian
+ he=>"http://he.wikipedia.org Hebrew [10,AS]",
+ hi=>"http://hi.wikipedia.org Hindi [550,AS]",
+ hif=>"http://hif.wikipedia.org Fiji Hindi [0.46,OC]",
+ ho=>"http://ho.wikipedia.org Hiri Motu",
+ hr=>"http://hr.wikipedia.org Croatian [6.2,EU]",
+ hsb=>"http://hsb.wikipedia.org Upper Sorbian [0.04,EU]",
+ ht=>"http://ht.wikipedia.org Haitian [12,NA]",
+ hu=>"http://hu.wikipedia.org Hungarian [15,EU]",
+ hy=>"http://hy.wikipedia.org Armenian [5.5,AS]",
+ hz=>"http://hz.wikipedia.org Herero [0.13,AF]",
+ ia=>"http://ia.wikipedia.org Interlingua [,AL]",
+ iba=>"http://iba.wikipedia.org Iban",
+ id=>"http://id.wikipedia.org Indonesian [250,AS]",
+ ie=>"http://ie.wikipedia.org Interlingue [,AL]",
+ ig=>"http://ig.wikipedia.org Igbo [22.5,AF]",
+ ii=>"http://ii.wikipedia.org Yi [2,AS,C]",
+ ik=>"http://ik.wikipedia.org Inupiak [0.0021,NA]",
+ ilo=>"http://ilo.wikipedia.org Ilokano [10,AS]",
+ io=>"http://io.wikipedia.org Ido [,AL]",
+ is=>"http://is.wikipedia.org Icelandic [0.32,EU]",
+ it=>"http://it.wikipedia.org Italian [70,EU]",
+ iu=>"http://iu.wikipedia.org Inuktitut [0.03,NA]",
+ ja=>"http://ja.wikipedia.org Japanese [132,AS]",
+ jbo=>"http://jbo.wikipedia.org Lojban [,AL]",
+ jv=>"http://jv.wikipedia.org Javanese [80,AS]",
+ ka=>"http://ka.wikipedia.org Georgian [4.2,EU]",
+ kaa=>"http://kaa.wikipedia.org Karakalpak [0.41,AS]",
+ kab=>"http://ka.wikipedia.org Kabyle [8,AF]",
+ kaw=>"http://kaw.wikipedia.org Kawi",
+ kg=>"http://kg.wikipedia.org Kongo [7,AF]",
+ ki=>"http://ki.wikipedia.org Kikuyu [5.4,AF]",
+ kj=>"http://kj.wikipedia.org Kuanyama",
+ kk=>"http://kk.wikipedia.org Kazakh [12,AS]",
+ kl=>"http://kl.wikipedia.org Greenlandic [0.05,NA]",
+ km=>"http://km.wikipedia.org Khmer [18.5,AS]", # was Cambodian
+ kn=>"http://kn.wikipedia.org Kannada [47,AS,I]",
+ ko=>"http://ko.wikipedia.org Korean [78,AS]",
+ koi=>"http://koi.wikipedia.org Komi-Permyak [0.094,EU]",
+ kr=>"http://kr.wikipedia.org Kanuri [4,AF]",
+ ks=>"http://ks.wikipedia.org Kashmiri [4.6,AS,I]",
+ ksh=>"http://ksh.wikipedia.org Ripuarian [0.25,EU]",
+ ku=>"http://ku.wikipedia.org Kurdish [26,AS]",
+ kv=>"http://kv.wikipedia.org Komi [0.293,EU]",
+ kw=>"http://kw.wikipedia.org Cornish [0.000245,EU]", # was Kornish
+ ky=>"http://ky.wikipedia.org Kirghiz [5,AS]",
+ la=>"http://la.wikipedia.org Latin [,W]",
+ lad=>"http://lad.wikipedia.org Ladino [0.109,AS]",
+ lb=>"http://lb.wikipedia.org Luxembourgish [0.39,EU]", # was Letzeburgesch
+ lbe=>"http://lbe.wikipedia.org Lak [0.12,AS]",
+ lg=>"http://lg.wikipedia.org Ganda [10,AF]",
+ li=>"http://li.wikipedia.org Limburgish [1.6,EU]",
+ lij=>"http://lij.wikipedia.org Ligurian [1.9,EU]",
+ lmo=>"http://lmo.wikipedia.org Lombard [3,EU]",
+ ln=>"http://ln.wikipedia.org Lingala [25,AF]",
+ lo=>"http://lo.wikipedia.org Laotian [5.2,AS]",
+ ls=>"http://ls.wikipedia.org Latino Sine Flexione",
+ lt=>"http://lt.wikipedia.org Lithuanian [3.5,EU]",
+ lv=>"http://lv.wikipedia.org Latvian [1.6,EU]",
+ mad=>"http://mad.wikipedia.org Madurese [14]",
+ mak=>"http://mak.wikipedia.org Makasar [2]",
+ map_bms=>"http://map-bms.wikipedia.org Banyumasan [13.5,AS]",
+ "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
+ mdf=>"http://mdf.wikipedia.org Moksha [0.5,EU]",
+ mg=>"http://mg.wikipedia.org Malagasy [20,AF]",
+ mh=>"http://mh.wikipedia.org Marshallese [0.0439,OC]",
+ mhr=>"http://mhr.wikipedia.org Eastern Mari [0.3,EU]",
+ mi=>"http://mi.wikipedia.org Maori [0.157,OC]",
+ min=>"http://min.wikipedia.org Minangkabau [6.5]",
+ minnan=>"http://minnan.wikipedia.org Minnan",
+ mk=>"http://mk.wikipedia.org Macedonian [2.7,EU]",
+ ml=>"http://ml.wikipedia.org Malayalam [37,AS,I]",
+ mn=>"http://mn.wikipedia.org Mongolian [5.2,AS]",
+ mo=>"http://mo.wikipedia.org Moldavian [,EU]",
+ mr=>"http://mr.wikipedia.org Marathi [90,AS,I]",
+ mrj=>"http://mrj.wikipedia.org Western Mari [0.3,A]",
+ ms=>"http://ms.wikipedia.org Malay [300,AS]",
+ mt=>"http://mt.wikipedia.org Maltese [0.37,EU]",
+ mus=>"http://mus.wikipedia.org Muskogee [0.006,NA]",
+ mwl=>"http://mwl.wikipedia.org Mirandese [0.015,EU]",
+ my=>"http://my.wikipedia.org Burmese [52,AS]",
+ myv=>"http://myv.wikipedia.org Erzya [0.5,AS]",
+ mzn=>"http://mzn.wikipedia.org Mazandarani [3.7,AS]",
+ na=>"http://na.wikipedia.org Nauruan [0.006,OC]", # was Nauru
+ nah=>"http://nah.wikipedia.org Nahuatl [1.45,NA]",
+ nap=>"http://nap.wikipedia.org Neapolitan [7.5,EU]",
+ nds=>"http://nds.wikipedia.org Low Saxon [10,EU]",
+ nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon [10,EU]",
+ "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon [10,EU]",
+ ne=>"http://ne.wikipedia.org Nepali [30,AS,I]",
+ new=>"http://new.wikipedia.org Nepal Bhasa [0.8,AS,I]",
+ ng=>"http://ng.wikipedia.org Ndonga [0.690,AF]",
+ nl=>"http://nl.wikipedia.org Dutch [27,EU,SA]",
+ nov=>"http://nov.wikipedia.org Novial [,AL]",
+ nrm=>"http://nrm.wikipedia.org Norman [,EU]",
+ nn=>"http://nn.wikipedia.org Nynorsk [4.7,EU]", # was Neo-Norwegian
+ no=>"http://no.wikipedia.org Norwegian [4.7,EU]",
+ nv=>"http://nv.wikipedia.org Navajo [0.178,NA]",
+ ny=>"http://ny.wikipedia.org Chichewa [9.3,AF]",
+ oc=>"http://oc.wikipedia.org Occitan [1.9,EU]",
+ om=>"http://om.wikipedia.org Oromo [25.5,AF]",
+ or=>"http://or.wikipedia.org Oriya [31,AS,I]",
+ os=>"http://os.wikipedia.org Ossetic [0.52,AS]",
+ pa=>"http://pa.wikipedia.org Punjabi [104,AS,I]",
+ pag=>"http://pag.wikipedia.org Pangasinan [1.5,AS]",
+ pam=>"http://pam.wikipedia.org Kapampangan [2.9,AS]",
+ pap=>"http://pap.wikipedia.org Papiamentu [0.329,SA]",
+ pcd=>"http://pcd.wikipedia.org Picard [,EU]",
+ pdc=>"http://pdc.wikipedia.org Pennsylvania German [0.250,NA]",
+ pi=>"http://pi.wikipedia.org Pali [,AS]",
+ pih=>"http://pih.wikipedia.org Norfolk [0.0006,OC]",
+ pl=>"http://pl.wikipedia.org Polish [43,EU]",
+ pms=>"http://pms.wikipedia.org Piedmontese [2,EU]",
+ pnb=>"http://pnb.wikipedia.org Western Panjabi [60,AS]",
+ pnt=>"http://pnt.wikipedia.org Pontic [0.325,EU]",
+ ps=>"http://ps.wikipedia.org Pashto [26,AS]",
+ pt=>"http://pt.wikipedia.org Portuguese [290,EU,SA,AF,AS]",
+ qu=>"http://qu.wikipedia.org Quechua [10.4,SA]",
+ rue=>"http://rue.wikipedia.org Rusyn [0.6,EU]",
+ rm=>"http://rm.wikipedia.org Romansh [0.035,EU]", # was Rhaeto-Romance
+ rmy=>"http://rmy.wikipedia.org Romani [2.5,EU]",
+ rn=>"http://rn.wikipedia.org Kirundi [4.6,AF]",
+ ro=>"http://ro.wikipedia.org Romanian [28,EU]",
+ roa_rup=>"http://roa-rup.wikipedia.org Aromanian [0.3,EU]",
+ "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian [0.5]",
+ roa_tara=>"http://roa-tara.wikipedia.org Tarantino [0.9,EU]",
+ "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
+ ru=>"http://ru.wikipedia.org Russian [278,EU,AS]",
+ ru_sib=>"http://ru-sib.wikipedia.org Siberian",
+ "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
+ rw=>"http://rw.wikipedia.org Kinyarwanda [12,AF]",
+ sa=>"http://sa.wikipedia.org Sanskrit [0.05,AS,I]",
+ sah=>"http://sah.wikipedia.org Sakha [0.456,AS]",
+ sc=>"http://sc.wikipedia.org Sardinian [1.85,EU]",
+ scn=>"http://scn.wikipedia.org Sicilian [8,EU]",
+ sco=>"http://sco.wikipedia.org Scots [1.5,EU]",
+ sd=>"http://sd.wikipedia.org Sindhi [41,AS,I]",
+ se=>"http://se.wikipedia.org Northern Sami [0.02,EU]",
+ sg=>"http://sg.wikipedia.org Sangro [3,AF]",
+ sh=>"http://sh.wikipedia.org Serbo-Croatian [23,EU]",
+ si=>"http://si.wikipedia.org Sinhala [19,AS]",
+ simple=>"http://simple.wikipedia.org Simple English [1500,EU,NA,OC,AS,AF]",
+ sk=>"http://sk.wikipedia.org Slovak [7,EU]",
+ sl=>"http://sl.wikipedia.org Slovene [2.4,EU]",
+ sm=>"http://sm.wikipedia.org Samoan [0.370,OC]",
+ sn=>"http://sn.wikipedia.org Shona [7,AF]",
+ so=>"http://so.wikipedia.org Somali [13.5,AF]",
+ sq=>"http://sq.wikipedia.org Albanian [6,EU]",
+ sr=>"http://sr.wikipedia.org Serbian [12,EU]",
+ srn=>"http://srn.wikipedia.org Sranan [0.3,SA]",
+ ss=>"http://ss.wikipedia.org Siswati [3,AF]",
+ st=>"http://st.wikipedia.org Sesotho [4.9,AF]",
+ stq=>"http://stq.wikipedia.org Saterland Frisian [0.002,EU]",
+ su=>"http://su.wikipedia.org Sundanese [27,AS]",
+ sv=>"http://sv.wikipedia.org Swedish [10,EU]",
+ sw=>"http://sw.wikipedia.org Swahili [50,AF]",
+ szl=>"http://szl.wikipedia.org Silesian [0.056,EU]",
+ ta=>"http://ta.wikipedia.org Tamil [66,AS,I]",
+ te=>"http://te.wikipedia.org Telugu [80,AS,I]",
+ test=>"http://test.wikipedia.org Test",
+ tet=>"http://tet.wikipedia.org Tetum [0.8,AS]",
+ tg=>"http://tg.wikipedia.org Tajik [4.4,AS]",
+ th=>"http://th.wikipedia.org Thai [73,AS]",
+ ti=>"http://ti.wikipedia.org Tigrinya [6.7,AF]",
+ tk=>"http://tk.wikipedia.org Turkmen [9,AS]",
+ tl=>"http://tl.wikipedia.org Tagalog [90,AS]",
+ tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
+ tn=>"http://tn.wikipedia.org Setswana [4.4,AF]",
+ to=>"http://to.wikipedia.org Tongan [0.105,OC]",
+ tokipona=>"http://tokipona.wikipedia.org Tokipona",
+ tpi=>"http://tpi.wikipedia.org Tok Pisin [5.5,AS]",
+ tr=>"http://tr.wikipedia.org Turkish [70,EU,AS]",
+ ts=>"http://ts.wikipedia.org Tsonga [3.3,AF]",
+ tt=>"http://tt.wikipedia.org Tatar [8,AS]",
+ tum=>"http://tum.wikipedia.org Tumbuka [2,AF]",
+ turn=>"http://turn.wikipedia.org Turnbuka",
+ tw=>"http://tw.wikipedia.org Twi [14.8,AF]",
+ ty=>"http://ty.wikipedia.org Tahitian [0.120,OC]",
+ udm=>"http://udm.wikipedia.org Udmurt [0.550,AS]",
+ ug=>"http://ug.wikipedia.org Uyghur [10,AS,C]",
+ uk=>"http://uk.wikipedia.org Ukrainian [45,EU]",
+ ur=>"http://ur.wikipedia.org Urdu [60,AS,I]",
+ uz=>"http://uz.wikipedia.org Uzbek [23.5,AS]",
+ ve=>"http://ve.wikipedia.org Venda [0.875,AF]",
+ vec=>"http://vec.wikipedia.org Venetian [2.3,EU]",
+ vi=>"http://vi.wikipedia.org Vietnamese [80,AS]",
+ vls=>"http://vls.wikipedia.org West Flemish [1.06,EU]",
+ vo=>"http://vo.wikipedia.org Volap\xFCk [0.000010,AL]",
+ wa=>"http://wa.wikipedia.org Walloon [0.6,EU]",
+ war=>"http://war.wikipedia.org Waray-Waray [3.1,AS]",
+ wo=>"http://wo.wikipedia.org Wolof [3.6,AF]",
+ wuu=>"http://wuu.wikipedia.org Wu [77,AS,C]",
+ xal=>"http://xal.wikipedia.org Kalmyk [0.174,EU]",
+ xh=>"http://xh.wikipedia.org Xhosa [7.9,AF]",
+ yi=>"http://yi.wikipedia.org Yiddish [3.2,W]",
+ yo=>"http://yo.wikipedia.org Yoruba [25,AF]",
+ za=>"http://za.wikipedia.org Zhuang [14,AS,C]",
+ zea=>"http://zea.wikipedia.org Zealandic [0.220,EU]",
+ zh=>"http://zh.wikipedia.org Chinese [1300,AS]",
+ zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan [49,AS,C]",
+ "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan [60]",
+ zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese [,AS,C]",
+ "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese [,AS,C]",
+ zh_yue=>"http://zh-yue.wikipedia.org Cantonese [71,AS,C]",
+ "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese [71,AS,C]",
+ zu=>"http://zu.wikipedia.org Zulu [26,AF]",
+ zz=>"&nbsp; All&nbsp;languages",
+ zzz=>"&nbsp; All&nbsp;languages except English"
+ );
+
+ # provide default, may be overruled at localization file
+ foreach $key (keys %wikipedias)
+ {
+ my $wikipedia = $wikipedias {$key} ;
+ if ($wikipedia =~ /\[.*\]/)
+ {
+ $wikipedia2 = $wikipedia ;
+ $wikipedia2 =~ s/^.*?\[// ;
+ $wikipedia2 =~ s/\].*$// ;
+ ($speakers, $regions) = split (',', $wikipedia2,2) ;
+ @regions = split (',', $regions) ;
+ $out_speakers {$key} = $speakers ;
+
+ if ($speakers > $speakers_max)
+ { $speakers_max = $speakers ; }
+
+ foreach $region (@regions)
+ {
+ if (length ($region) != 2) # land codes China, India
+ { $region = "" ; }
+ }
+ @regions = sort {$a cmp $b} @regions ;
+ $out_regions {$key} = join (',', @regions) ;
+ $regions = join (',', @regions) ;
+ }
+ $wikipedia =~ s/\s*\[..*$// ; # remove speakers
+ $out_urls {$key} = $wikipedia ;
+ $out_languages {$key} = $wikipedia ;
+
+ if (($key !~ /_/) && ($key !~ /(?:nostalgia|sep11|species)/) && ($wikipedia =~ /wikipedia.org/)) # fiu-vro yes, fiu_vro no / also meta, commons etc no
+ {
+ ($key2 = $key) =~ s/"//g ;
+ push @real_languages, $key2 ;
+ }
+
+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
+ $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
+ $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
+ $out_article {$key} =~ s/ /_/g ;
+
+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
+ }
+}
+


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS [at] lists
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Wikipedia mediawiki-cvs RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.