#!/usr/bin/perl
use strict;
use warnings;
use lib qw(/data/perl/lib/perl5/site_perl);
use Getopt::Long;
use POSIX;
use Date::Calc qw(:all);
use XML::Simple;
use Compress::Zlib;
use File::Find;
use Log::Log4perl qw(:easy);
use Net::hostent;
use Socket;
use FileHandle;
use Net::DNS;
use Data::Dumper;
my $site;
my $date;
my $options;
my @proxyLogFiles = ();
my %ipFound;
#config file must be in same directory as this file
my $config = eval { XMLin() } or die();
#configure logging
Log::Log4perl->
init(
$config->{ log4perl }
);
my $log = Log::Log4perl->get_logger();
$options = GetOptions(
"site=s" => $site, "date:s" => $date
);
defined($site) or usage();
#initialise %ipFound
foreach my $ipArray (
$config->{ site }->{ $site }->{ address }
)
{
foreach my $ip (@$ipArray) {
$ipFound{$ip} = 0;
}
} #we want to get yesterdays date
if ( defined($date) ) {
$date = join( "-", Add_Delta_Days( verifyDateFormat($date), 0 ) );
}
else {
$date = join( "-", Add_Delta_Days( Today(), 0 ) );
}
runInfo( $site, $date, $config );
my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );
my $combinedLogFileName =
sprintf(
"%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY
);
#sets the @proxyLogFiles variable with list of compressed files to process find( &newFile, $config->{ccelog} );
if (
-e $combinedLogFileName & amp;
&
-s $combinedLogFileName
)
{
$log->info( sprintf( "%s already exists", $combinedLogFileName ) );
}
else { #make sure we have something to do
if (
scalar(@proxyLogFiles) & gt;
0
)
{ #combine the logs combineLogs( $site, $date, @proxyLogFiles );
}
else {
$log->logdie("No files found");
} #making sure we record what logs files were found
foreach my $ip ( keys %ipFound ) {
if ( !$ipFound{$ip} ) {
$log->info( sprintf( "Log for %s not found", $ip ) );
}
}
}
cleanUp(@proxyLogFiles);
yahooMessageBoards( $site, $date );
exit 0;
sub usage {
printf( "Usage: %s --site=bmssite [--date=YYYY-MM-DD]n", $0 );
printf("... where site is either usevv, ushpw, jptok, ukchs, aumlbn");
exit(1);
} #date needs to be in mm-dd-yyyy format on the command line
#we also want to verify that the date is correct #and we want to return a format that the Date::Calc functions can use
sub verifyDateFormat {
my ($date) = @_;
usage() if ( $date !~ /^d{4}-d{2}-d{2}$/ );
if ( !( check_date( split( /-/, $date ) ) ) ) {
$log->logdie("Invalid date range");
}
return split( /-/, $date );
}
sub newFile {
my $fileName = $_;
my $filePath = $File::Find::dir;
my $fullName = $File::Find::name;
my $dateYYYYMMDD = join( "", Add_Delta_Days( split( /-/, $date ), -1 ) );
# my $dateYYYYMMDD = join( "", split( /-/, $date ) );
foreach my $ipArray (
$config->{ site }->{ $site }->{ address }
)
{
foreach my $ip (@$ipArray) {
if ( $fileName =~ /^celog_$ip_$dateYYYYMMDD_d{6}.txt.gz$/ ) {
push( @proxyLogFiles, $fullName );
$log->info( sprintf( "Found %s", $fullName ) );
$ipFound{$ip} = 1;
}
}
}
}
sub combineLogs {
my ( $site, $date, @fileList ) = @_;
my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );
my $combinedLogFileName =
sprintf(
"%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY
);
if ( -e $combinedLogFileName ) {
$log->info( sprintf( "%s already exists", $combinedLogFileName ) );
cleanUp(@fileList);
return 0;
}
my $combinedLogFile =
eval { gzopen( sprintf( "%s", $combinedLogFileName ), 'wb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));
foreach my $file (@fileList) {
my ( $buffer, $bytesread, $byteswritten ) = ();
my $proxyLogFile = eval { gzopen( $file, 'rb' ) } or $log->logdie( sprintf( "Could not open %s for reading", $file ) ); #open each log file and concatenate to combined log file
while (
!$proxyLogFile->gzeof()
)
{
$bytesread += $proxyLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $proxyLogFile->gzerror()));
$byteswritten += $combinedLogFile->gzwrite($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));
}
$log->info( sprintf( "Added %s to %s", $file, $combinedLogFileName ) );
#want to be sure we did not lose any info
$log->debug( sprintf( "%s bytes read from %sn", $bytesread, $file ) );
$log->debug(
sprintf(
"%s bytes written to %sn",
$byteswritten, $combinedLogFileName
)
);
$proxyLogFile->gzclose();
}
$combinedLogFile->gzclose();
cleanUp(@fileList);
return 1;
} #what parameters are we running with
sub runInfo {
my ( $site, $date, $config ) = @_;
$log->info( sprintf( "DATE = %s", $date ) );
$log->info( sprintf( "DOWNLOADDIR = %s", $config-> { logdir }));
$log->info( sprintf( "BINDIR = %s", $config-> { bindir }));
$log->info( sprintf( "CCEDIR = %s", $config-> { ccelog }));
$log->info( sprintf( "Host info for %s proxies", $site ) );
foreach my $ipArray ( $config->{ site }->{ $site }->{ address }) {
foreach my $ip (@$ipArray) { $log->info( sprintf( "HOST = %s", $ip ) ); }
}
}
sub cleanUp {
my (@fileList) = @_;
foreach my $file (@fileList) { unlink($file); $log->info( sprintf( "Deleted %s", $file ) ); }
}
sub yahooMessageBoards {
my ( $site, $date ) = @_;
my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );
my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY);
my ( $workstation, $datetime, %ipaddr, %urls, %size, $total_bytes, $hits );
my $log = Log::Log4perl->
get_logger();
my @messageURL = ();
my ( $buffer, $bytesread ) = ();
my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));
my $file = sprintf( "%s/yahoo/%s.%s.csv", $config->{ reportdir }, $site, $dateMM_DD_YYYY);
my $yahooReport = eval { new FileHandle( $file, 'w' ); } or $log->logdie( sprintf( "Could not open %s for writing", $file ) );
my $dns = eval { new Net::DNS::Resolver } or $log->logdie($$);
while ( !$combinedLogFile->gzeof()) {
$bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));
next if ( $buffer !~ /messages.yahoo.com/ );
next if ( $buffer !~ /tid=bmy/ );
push( @messageURL, $buffer );
}
$combinedLogFile->gzclose();
$log->info( sprintf( "Read %s MB from %s", $bytesread / ( 1024 * 1024 ), $combinedLogFileName ) );
foreach my $line (@messageURL) {
my ( $timestamp, $workstation, $url ) = ( split( ' ', $line ) )[ 0, 2, 6 ];
if ( ( Time_to_Date(time) )[ 0, 1, 2 ] == ( Add_Delta_Days( ( Time_to_Date($timestamp) )[ 0, 1, 2 ], 1 ) )) {
my $query = $dns->search($workstation);
if ($query) {
foreach my $entries ( $query->answer ) {
$workstation = $entries->ptrdname();
}
}
else {
$log->logwarn( sprintf( "query failed: %s", $dns->errorstring));
}
}
$yahooReport->printf( "%s,%s,%sn", $timestamp, $workstation, $url );
}
$yahooReport->close();
$log->info( sprintf( "Created report for %s in %s", $date, $file ));
}
sub topDomains {
my ( $site, $date ) = @_;
my $maxlines = 100;
my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );
my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY);
my $bytesread;
my $buffer;
my $log = Log::Log4perl->get_logger();
my ( %category, %urls, %size, $total_bytes, $hits );
my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));
while( !$combinedLogFile->gzeof() ) {
$bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));
chomp($buffer);
my ( $timestamp, $bytes, $url, $cat ) = ( split( ' ', $buffer, 12 ) )[ 0, 4, 6, 11 ];
my $datetime = $timestamp;
my $domain =~ s/w+:\/\/([^/]+).*$/$1/;
if ( $domain !~ /d{1,3}.d{1,3}.d{1,3}.d{1,3}/ ) {
my @pieces = split( /./, $domain ); $domain = sprintf( "%s.%s", $pieces[-2], $pieces[-1] );
}
if ( !defined($cat) ) { $cat = 'NONE'; }
print $buffer, "n";
printf( "%s, %s, %s, %sn", $timestamp, $bytes, $domain, $cat);
$category{$domain} = $cat;
$urls{$domain}++;
$size{$domain} += $bytes;
$total_bytes += $bytes;
$hits++;
}
}
Linking back to Another attempted pingback to hopefully generate a pingback there. That article links here so a pingback should appear below.