Sophie

Sophie

distrib > Mandriva > 9.1 > i586 > by-pkgid > 1efb6ea4098ccdb5a3db9c5346925414 > files > 9

mod_log_mysql-1.15-6mdk.i586.rpm

#!/usr/bin/perl

# $Id: make_combined_log.pl,v 1.4 2002/01/02 20:47:44 helios Exp $
#
# make_combined_log.pl
#
# Usage: make_combined_log <days> <virtual host>
#
# This perl script extracts the httpd access data from a MySQL database
# and formats it properly for parsing by 3rd-party log analysis tools.
#
# The script is intended to be run out by cron.  Its commandline arguments tell
# it how many days' worth of access records to extract, and which virtual_host
# you are interested in (because many people log several virthosts to one MySQL
# db.) This permits you to run it daily, weekly, every 9 days -- whatever you
# decide.
#
# Note: By "days" I mean "chunks of 24 hours prior to the moment this script is
# run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34
# p.m. on the 11th.
#
# Known issues:
# * Because GET and POST are not discriminated in the MySQL log, we'll just
#   assume that all requests are GETs.  This should have negligible effect
#   on any analysis software.  This could be remedied IF you stored the full
#   HTTP request in your database instead of just the URI, but that's going to
#   cost you a LOT of space really quickly...
#
# * Because this is somewhat of a quick hack it doesn't do the most robust
#   error checking in the world.  Run it by hand to confirm your usage before
#   putting it in crontab.

$| = 1;

use DBI;

# Remember, $#ARGV is parameters minus one...
if ($#ARGV != 1) {
	die "Incorrect usage, please read the perl source code for correct usage."
}

$days     = $ARGV[0];
$virthost = $ARGV[1];

#
# Set up the proper variables to permit database access
#
$serverName = "your.dbhost.com";
$serverPort = "3306";
$serverUser = "someuser";
$serverPass = "somepass";
$serverTbl  = "acc_log_tbl";
$serverDb   = "apache";

#
# Other constants
#
$st_tz = "-0800";
$dt_tz = "-0700";

$now = time();
$start = $now - (86400 * $days);

#
# Connect and fetch the records
#
$dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
if (not $dbh) {
	die "Unable to connect to the database.  Please check your connection variables. (Bad password? Incorrect perms?)";
}

$records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from $serverTbl where virtual_host='$virthost' and time_stamp >= $start");
$records->execute;
if (not $records) {
	die "No such table or the select returned no records."
}

#Right
#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"

#Bad
#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"


#
# Pull out the data row by row and format it
#
while (@data = $records->fetchrow_array) {
	($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]);
	$year=$year+1900;

	# Create format for leading-zero formatting
	if ($mday < 10) { $mday = "0$mday"; }
	if ($mon < 10) { $mon = "0$mon"; }	
	if ($hour < 10) { $hour = "0$hour"; }
	if ($min < 10) { $min = "0$min"; }
	if ($sec < 10) { $sec = "0$sec"; }
	
	# Convert numeric month to string month
    for ($mon) {
		if    (/00/)  { $mon = "Jan";} 
		elsif (/01/)  { $mon = "Feb";} 
		elsif (/02/)  { $mon = "Mar";} 
		elsif (/03/)  { $mon = "Apr";}
		elsif (/04/)  { $mon = "May";}
		elsif (/05/)  { $mon = "Jun";}
		elsif (/06/)  { $mon = "Jul";}
		elsif (/07/)  { $mon = "Aug";}
		elsif (/08/)  { $mon = "Sep";}
		elsif (/09/)  { $mon = "Oct";}
		elsif (/10/)  { $mon = "Nov";}
		elsif (/11/)  { $mon = "Dec";}
    }
    
    # Create the output
	print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec ";
	if ($isdst) {
		print "$dt_tz\] ";
	} else {
		print "$st_tz\] ";
	}
	print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n";
}

#
# Done
#
$records->finish;