[sourcecode language=’php’]

#!/usr/bin/perl

use strict;

use warnings;

use lib qw(/data/perl/lib/perl5/site_perl);

use Getopt::Long;

use POSIX;

use Date::Calc qw(:all);

use XML::Simple;

use Compress::Zlib;

use File::Find;

use Log::Log4perl qw(:easy);

use Net::hostent;

use Socket;

use FileHandle;

use Net::DNS;

use Data::Dumper;my $site;

my $date;

my $options;

my @proxyLogFiles = ();

my %ipFound;

#config file must be in same directory as this file

my $config = eval { XMLin() } or die();

#configure logging

Log::Log4perl->init( $config->{log4perl} );

my $log = Log::Log4perl->get_logger();

$options = GetOptions( “site=s” => $site, “date:s” => $date );

defined($site)

or usage();

#initialise %ipFound

foreach my $ipArray ( $config->{site}->{$site}->{address} ) {

foreach my $ip (@$ipArray) {

$ipFound{$ip} = 0;

}

} #we want to get yesterdays date

if ( defined($date) ) {

$date = join( “-“, Add_Delta_Days( verifyDateFormat($date), 0 ) );

}

else {

$date = join( “-“, Add_Delta_Days( Today(), 0 ) );

}

runInfo( $site, $date, $config );

my $dateMM_DD_YYYY = join( “-“, ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

sprintf( “%s/%s.%s.gz”, $config->{logdir}, $site, $dateMM_DD_YYYY );

#sets the @proxyLogFiles variable with list of compressed files to process find( &newFile, $config->{ccelog} );

if ( -e $combinedLogFileName && -s $combinedLogFileName ) {

$log->info( sprintf( “%s already exists”, $combinedLogFileName ) );

}

else { #make sure we have something to do

if ( scalar(@proxyLogFiles) > 0 )

{ #combine the logs combineLogs( $site, $date, @proxyLogFiles );

}

else {

$log->logdie(“No files found”);

} #making sure we record what logs files were found

foreach my $ip ( keys %ipFound ) {

if ( !$ipFound{$ip} ) {

$log->info( sprintf( “Log for %s not found”, $ip ) );

}

}

}

cleanUp(@proxyLogFiles);

yahooMessageBoards( $site, $date );

exit 0;

sub usage {

printf( “Usage: %s –site=bmssite [–date=YYYY-MM-DD]n”, $0 );

printf(“… where site is either usevv, ushpw, jptok, ukchs, aumlbn”);

exit(1);

} #date needs to be in mm-dd-yyyy format on the command line

#we also want to verify that the date is correct #and we want to return a format that the Date::Calc functions can use

sub verifyDateFormat {

my ($date) = @_;

usage() if ( $date !~ /^d{4}-d{2}-d{2}$/ );

if ( !( check_date( split( /-/, $date ) ) ) ) {

$log->logdie(“Invalid date range”);

}

return split( /-/, $date );

}

sub newFile {

my $fileName = $_;

my $filePath = $File::Find::dir;

my $fullName = $File::Find::name;

my $dateYYYYMMDD = join( “”, Add_Delta_Days( split( /-/, $date ), -1 ) );

# my $dateYYYYMMDD = join( “”, split( /-/, $date ) );

foreach my $ipArray ( $config->{site}->{$site}->{address} ) {

foreach my $ip (@$ipArray) {

if ( $fileName =~ /^celog_$ip_$dateYYYYMMDD_d{6}.txt.gz$/ ) {

push( @proxyLogFiles, $fullName );

$log->info( sprintf( “Found %s”, $fullName ) );

$ipFound{$ip} = 1;

}

}

}

}

sub combineLogs {

my ( $site, $date, @fileList ) = @_;

my $dateMM_DD_YYYY = join( “-“, ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

sprintf( “%s/%s.%s.gz”, $config->{logdir}, $site, $dateMM_DD_YYYY );

if ( -e $combinedLogFileName ) {

$log->info( sprintf( “%s already exists”, $combinedLogFileName ) );

cleanUp(@fileList);

return 0;

}

my $combinedLogFile =

eval { gzopen( sprintf( “%s”, $combinedLogFileName ), ‘wb’ ); }

or $log->logdie(

sprintf( “Could not open %s for writing”, $combinedLogFileName ) );

foreach my $file (@fileList) {

my ( $buffer, $bytesread, $byteswritten ) = ();

my $proxyLogFile = eval { gzopen( $file, ‘rb’ ) }

or $log->logdie( sprintf( “Could not open %s for reading”, $file ) );

#open each log file and concatenate to combined log file

while ( !$proxyLogFile->gzeof() ) {

$bytesread += $proxyLogFile->gzreadline($buffer)

or $log->logdie( sprintf( “%s”, $proxyLogFile->gzerror() ) );

$byteswritten += $combinedLogFile->gzwrite($buffer)

or $log->logdie( sprintf( “%s”, $combinedLogFile->gzerror() ) );

}

$log->info( sprintf( “Added %s to %s”, $file, $combinedLogFileName ) );

#want to be sure we did not lose any info

$log->debug( sprintf( “%s bytes read from %sn”, $bytesread, $file ) );

$log->debug(

sprintf(

“%s bytes written to %sn”,

$byteswritten, $combinedLogFileName

)

);

$proxyLogFile->gzclose();

}

$combinedLogFile->gzclose();

cleanUp(@fileList);

return 1;

} #what parameters are we running with

sub runInfo {

my ( $site, $date, $config ) = @_;

$log->info( sprintf( “DATE = %s”, $date ) );

$log->info( sprintf( “DOWNLOADDIR = %s”, $config->{logdir} ) );

$log->info( sprintf( “BINDIR = %s”, $config->{bindir} ) );

$log->info( sprintf( “CCEDIR = %s”, $config->{ccelog} ) );

$log->info( sprintf( “Host info for %s proxies”, $site ) );

foreach my $ipArray ( $config->{site}->{$site}->{address} ) {

foreach my $ip (@$ipArray) {

$log->info( sprintf( “HOST = %s”, $ip ) );

}

}

}

sub cleanUp {

my (@fileList) = @_;

foreach my $file (@fileList) {

unlink($file);

$log->info( sprintf( “Deleted %s”, $file ) );

}

}

sub yahooMessageBoards {

my ( $site, $date ) = @_;

my $dateMM_DD_YYYY = join( “-“, ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

sprintf( “%s/%s.%s.gz”, $config->{logdir}, $site, $dateMM_DD_YYYY );

my ( $workstation, $datetime, %ipaddr, %urls, %size, $total_bytes, $hits );

my $log = Log::Log4perl->get_logger();

my @messageURL = ();

my ( $buffer, $bytesread ) = ();

my $combinedLogFile = eval { gzopen( $combinedLogFileName, ‘rb’ ); }

or $log->logdie(

sprintf( “Could not open %s for writing”, $combinedLogFileName ) );

my $file = sprintf( “%s/yahoo/%s.%s.csv”,

$config->{reportdir}, $site, $dateMM_DD_YYYY );

my $yahooReport = eval { new FileHandle( $file, ‘w’ ); }

or $log->logdie( sprintf( “Could not open %s for writing”, $file ) );

my $dns = eval { new Net::DNS::Resolver } or $log->logdie($$);

while ( !$combinedLogFile->gzeof() ) {

$bytesread += $combinedLogFile->gzreadline($buffer)

or $log->logdie( sprintf( “%s”, $combinedLogFile->gzerror() ) );

next if ( $buffer !~ /messages.yahoo.com/ );

next if ( $buffer !~ /tid=bmy/ );

push( @messageURL, $buffer );

}

$combinedLogFile->gzclose();

$log->info(

sprintf(

“Read %s MB from %s”,

$bytesread / ( 1024 * 1024 ),

$combinedLogFileName

)

);

foreach my $line (@messageURL) {

my ( $timestamp, $workstation, $url ) =

( split( ‘ ‘, $line ) )[ 0, 2, 6 ];

if ( ( Time_to_Date(time) )[ 0, 1, 2 ] ==

( Add_Delta_Days( ( Time_to_Date($timestamp) )[ 0, 1, 2 ], 1 ) ) )

{

my $query = $dns->search($workstation);

if ($query) {

foreach my $entries ( $query->answer ) {

$workstation = $entries->ptrdname();

}

}

else {

$log->logwarn(

sprintf( “query failed: %s”, $dns->errorstring ) );

}

}

$yahooReport->printf( “%s,%s,%sn”, $timestamp, $workstation, $url );

}

$yahooReport->close();

$log->info( sprintf( “Created report for %s in %s”, $date, $file ) );

}

sub topDomains {

my ( $site, $date ) = @_;

my $maxlines = 100;

my $dateMM_DD_YYYY = join( “-“, ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

sprintf( “%s/%s.%s.gz”, $config->{logdir}, $site, $dateMM_DD_YYYY );

my $bytesread;

my $buffer;

my $log = Log::Log4perl->get_logger();

my ( %category, %urls, %size, $total_bytes, $hits );

my $combinedLogFile = eval { gzopen( $combinedLogFileName, ‘rb’ ); }

or $log->logdie(

sprintf( “Could not open %s for writing”, $combinedLogFileName ) );

while ( !$combinedLogFile->gzeof() ) {

$bytesread += $combinedLogFile->gzreadline($buffer)

or $log->logdie( sprintf( “%s”, $combinedLogFile->gzerror() ) );

chomp($buffer);

my ( $timestamp, $bytes, $url, $cat ) =

( split( ‘ ‘, $buffer, 12 ) )[ 0, 4, 6, 11 ];

my $datetime = $timestamp;

my $domain =~ s/w+://([^/]+).*$/$1/;

if ( $domain !~ /d{1,3}.d{1,3}.d{1,3}.d{1,3}/ ) {

my @pieces = split( /./, $domain );

$domain = sprintf( “%s.%s”, $pieces[-2], $pieces[-1] );

}

if ( !defined($cat) ) {

$cat = ‘NONE’;

}

print $buffer, “n”;

printf( “%s, %s, %s, %sn”, $timestamp, $bytes, $domain, $cat );

$category{$domain} = $cat;

$urls{$domain}++;

$size{$domain} += $bytes;

$total_bytes += $bytes;

$hits++;

}

}

[/sourcecode]