processlog.pl


#!/usr/bin/perl use strict; use warnings; use lib qw(/data/perl/lib/perl5/site_perl); use Getopt::Long; use POSIX; use Date::Calc qw(:all); use XML::Simple; use Compress::Zlib; use File::Find; use Log::Log4perl qw(:easy); use Net::hostent; use Socket; use FileHandle; use Net::DNS; use Data::Dumper; my $site; my $date; my $options; my @proxyLogFiles = (); my %ipFound; #config file must be in same directory as this file my $config = eval { XMLin() } or die(); #configure logging Log::Log4perl-> init( $config->{ log4perl } ); my $log = Log::Log4perl->get_logger(); $options = GetOptions( "site=s" => $site, "date:s" => $date ); defined($site) or usage(); #initialise %ipFound foreach my $ipArray ( $config->{ site }->{ $site }->{ address } ) { foreach my $ip (@$ipArray) { $ipFound{$ip} = 0; } } #we want to get yesterdays date if ( defined($date) ) { $date = join( "-", Add_Delta_Days( verifyDateFormat($date), 0 ) ); } else { $date = join( "-", Add_Delta_Days( Today(), 0 ) ); } runInfo( $site, $date, $config ); my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] ); my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY ); #sets the @proxyLogFiles variable with list of compressed files to process find( &newFile, $config->{ccelog} ); if ( -e $combinedLogFileName & amp; & -s $combinedLogFileName ) { $log->info( sprintf( "%s already exists", $combinedLogFileName ) ); } else { #make sure we have something to do if ( scalar(@proxyLogFiles) & gt; 0 ) { #combine the logs combineLogs( $site, $date, @proxyLogFiles ); } else { $log->logdie("No files found"); } #making sure we record what logs files were found foreach my $ip ( keys %ipFound ) { if ( !$ipFound{$ip} ) { $log->info( sprintf( "Log for %s not found", $ip ) ); } } } cleanUp(@proxyLogFiles); yahooMessageBoards( $site, $date ); exit 0; sub usage { printf( "Usage: %s --site=bmssite [--date=YYYY-MM-DD]n", $0 ); printf("... where site is either usevv, ushpw, jptok, ukchs, aumlbn"); exit(1); } #date needs to be in mm-dd-yyyy format on the command line #we also want to verify that the date is correct #and we want to return a format that the Date::Calc functions can use sub verifyDateFormat { my ($date) = @_; usage() if ( $date !~ /^d{4}-d{2}-d{2}$/ ); if ( !( check_date( split( /-/, $date ) ) ) ) { $log->logdie("Invalid date range"); } return split( /-/, $date ); } sub newFile { my $fileName = $_; my $filePath = $File::Find::dir; my $fullName = $File::Find::name; my $dateYYYYMMDD = join( "", Add_Delta_Days( split( /-/, $date ), -1 ) ); # my $dateYYYYMMDD = join( "", split( /-/, $date ) ); foreach my $ipArray ( $config->{ site }->{ $site }->{ address } ) { foreach my $ip (@$ipArray) { if ( $fileName =~ /^celog_$ip_$dateYYYYMMDD_d{6}.txt.gz$/ ) { push( @proxyLogFiles, $fullName ); $log->info( sprintf( "Found %s", $fullName ) ); $ipFound{$ip} = 1; } } } } sub combineLogs { my ( $site, $date, @fileList ) = @_; my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] ); my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY ); if ( -e $combinedLogFileName ) { $log->info( sprintf( "%s already exists", $combinedLogFileName ) ); cleanUp(@fileList); return 0; } my $combinedLogFile = eval { gzopen( sprintf( "%s", $combinedLogFileName ), 'wb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName )); foreach my $file (@fileList) { my ( $buffer, $bytesread, $byteswritten ) = (); my $proxyLogFile = eval { gzopen( $file, 'rb' ) } or $log->logdie( sprintf( "Could not open %s for reading", $file ) ); #open each log file and concatenate to combined log file while ( !$proxyLogFile->gzeof() ) { $bytesread += $proxyLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $proxyLogFile->gzerror())); $byteswritten += $combinedLogFile->gzwrite($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror())); } $log->info( sprintf( "Added %s to %s", $file, $combinedLogFileName ) ); #want to be sure we did not lose any info $log->debug( sprintf( "%s bytes read from %sn", $bytesread, $file ) ); $log->debug( sprintf( "%s bytes written to %sn", $byteswritten, $combinedLogFileName ) ); $proxyLogFile->gzclose(); } $combinedLogFile->gzclose(); cleanUp(@fileList); return 1; } #what parameters are we running with sub runInfo { my ( $site, $date, $config ) = @_; $log->info( sprintf( "DATE = %s", $date ) ); $log->info( sprintf( "DOWNLOADDIR = %s", $config-> { logdir })); $log->info( sprintf( "BINDIR = %s", $config-> { bindir })); $log->info( sprintf( "CCEDIR = %s", $config-> { ccelog })); $log->info( sprintf( "Host info for %s proxies", $site ) ); foreach my $ipArray ( $config->{ site }->{ $site }->{ address }) { foreach my $ip (@$ipArray) { $log->info( sprintf( "HOST = %s", $ip ) ); } } } sub cleanUp { my (@fileList) = @_; foreach my $file (@fileList) { unlink($file); $log->info( sprintf( "Deleted %s", $file ) ); } } sub yahooMessageBoards { my ( $site, $date ) = @_; my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] ); my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY); my ( $workstation, $datetime, %ipaddr, %urls, %size, $total_bytes, $hits ); my $log = Log::Log4perl-> get_logger(); my @messageURL = (); my ( $buffer, $bytesread ) = (); my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName )); my $file = sprintf( "%s/yahoo/%s.%s.csv", $config->{ reportdir }, $site, $dateMM_DD_YYYY); my $yahooReport = eval { new FileHandle( $file, 'w' ); } or $log->logdie( sprintf( "Could not open %s for writing", $file ) ); my $dns = eval { new Net::DNS::Resolver } or $log->logdie($$); while ( !$combinedLogFile->gzeof()) { $bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror())); next if ( $buffer !~ /messages.yahoo.com/ ); next if ( $buffer !~ /tid=bmy/ ); push( @messageURL, $buffer ); } $combinedLogFile->gzclose(); $log->info( sprintf( "Read %s MB from %s", $bytesread / ( 1024 * 1024 ), $combinedLogFileName ) ); foreach my $line (@messageURL) { my ( $timestamp, $workstation, $url ) = ( split( ' ', $line ) )[ 0, 2, 6 ]; if ( ( Time_to_Date(time) )[ 0, 1, 2 ] == ( Add_Delta_Days( ( Time_to_Date($timestamp) )[ 0, 1, 2 ], 1 ) )) { my $query = $dns->search($workstation); if ($query) { foreach my $entries ( $query->answer ) { $workstation = $entries->ptrdname(); } } else { $log->logwarn( sprintf( "query failed: %s", $dns->errorstring)); } } $yahooReport->printf( "%s,%s,%sn", $timestamp, $workstation, $url ); } $yahooReport->close(); $log->info( sprintf( "Created report for %s in %s", $date, $file )); } sub topDomains { my ( $site, $date ) = @_; my $maxlines = 100; my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] ); my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY); my $bytesread; my $buffer; my $log = Log::Log4perl->get_logger(); my ( %category, %urls, %size, $total_bytes, $hits ); my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName )); while( !$combinedLogFile->gzeof() ) { $bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror())); chomp($buffer); my ( $timestamp, $bytes, $url, $cat ) = ( split( ' ', $buffer, 12 ) )[ 0, 4, 6, 11 ]; my $datetime = $timestamp; my $domain =~ s/w+:\/\/([^/]+).*$/$1/; if ( $domain !~ /d{1,3}.d{1,3}.d{1,3}.d{1,3}/ ) { my @pieces = split( /./, $domain ); $domain = sprintf( "%s.%s", $pieces[-2], $pieces[-1] ); } if ( !defined($cat) ) { $cat = 'NONE'; } print $buffer, "n"; printf( "%s, %s, %s, %sn", $timestamp, $bytes, $domain, $cat); $category{$domain} = $cat; $urls{$domain}++; $size{$domain} += $bytes; $total_bytes += $bytes; $hits++; } }

Linking back to Another attempted pingback to hopefully generate a pingback there. That article links here so a pingback should appear below.

Author:Khürt Williams

A human who works in information security and enjoys photography, Formula 1 and craft ale.