processlog.pl

`#!/usr/bin/perl

use strict;

use warnings;

use lib qw(/data/perl/lib/perl5/site_perl);

use Getopt::Long;

use POSIX;

use Date::Calc qw(:all);

use XML::Simple;

use Compress::Zlib;

use File::Find;

use Log::Log4perl qw(:easy);

use Net::hostent;

use Socket;

use FileHandle;

use Net::DNS;

use Data::Dumper;
my $site;

my $date;

my $options;

my @proxyLogFiles = ();

my %ipFound;

#config file must be in same directory as this file

my $config = eval { XMLin() } or die();

#configure logging

Log::Log4perl->
init(
$config->{ log4perl }
);

my $log = Log::Log4perl->get_logger();

$options = GetOptions(
“site=s” => $site, “date:s” => $date
);

defined($site) or usage();

#initialise %ipFound

foreach my $ipArray (
$config->{ site }->{ $site }->{ address }
)
{

foreach my $ip (@$ipArray) {

    $ipFound{$ip} = 0;

}

} #we want to get yesterdays date

if ( defined($date) ) {

$date = join( "-", Add_Delta_Days( verifyDateFormat($date), 0 ) );

}

else {

$date = join( "-", Add_Delta_Days( Today(), 0 ) );

}

runInfo( $site, $date, $config );

my $dateMM_DD_YYYY = join( “-“, ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

sprintf(
"%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY
);

#sets the @proxyLogFiles variable with list of compressed files to process find( &newFile, $config->{ccelog} );

if (
-e $combinedLogFileName & amp;
&
-s $combinedLogFileName
)
{

$log->info( sprintf( "%s already exists", $combinedLogFileName ) );

}

else { #make sure we have something to do

if (
    scalar(@proxyLogFiles) & gt;
    0
    )

{     #combine the logs combineLogs( $site, $date, @proxyLogFiles );

}

else {

    $log->logdie("No files found");

}    #making sure we record what logs files were found

foreach my $ip ( keys %ipFound ) {

    if ( !$ipFound{$ip} ) {

        $log->info( sprintf( "Log for %s not found", $ip ) );

    }

}

}

cleanUp(@proxyLogFiles);

yahooMessageBoards( $site, $date );

exit 0;

sub usage {

printf( "Usage: %s --site=bmssite [--date=YYYY-MM-DD]n", $0 );

printf("... where site is either usevv, ushpw, jptok, ukchs, aumlbn");

exit(1);

} #date needs to be in mm-dd-yyyy format on the command line

#we also want to verify that the date is correct #and we want to return a format that the Date::Calc functions can use

sub verifyDateFormat {

my ($date) = @_;

usage() if ( $date !~ /^d{4}-d{2}-d{2}$/ );

if ( !( check_date( split( /-/, $date ) ) ) ) {

    $log->logdie("Invalid date range");

}

return split( /-/, $date );

}

sub newFile {

my $fileName = $_;

my $filePath = $File::Find::dir;

my $fullName = $File::Find::name;

my $dateYYYYMMDD = join( "", Add_Delta_Days( split( /-/, $date ), -1 ) );

# my $dateYYYYMMDD = join( "", split( /-/, $date ) );

foreach my $ipArray (
    $config->{ site }->{ $site }->{ address }
    )
{

    foreach my $ip (@$ipArray) {

        if ( $fileName =~ /^celog_$ip_$dateYYYYMMDD_d{6}.txt.gz$/ ) {

            push( @proxyLogFiles, $fullName );

            $log->info( sprintf( "Found %s", $fullName ) );

            $ipFound{$ip} = 1;

        }

    }

}

}

sub combineLogs {

my ( $site, $date, @fileList ) = @_;

my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName =

    sprintf(
    "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY
    );

if ( -e $combinedLogFileName ) {

    $log->info( sprintf( "%s already exists", $combinedLogFileName ) );

    cleanUp(@fileList);

    return 0;

}

my $combinedLogFile =

    eval { gzopen( sprintf( "%s", $combinedLogFileName ), 'wb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));

foreach my $file (@fileList) {

    my ( $buffer, $bytesread, $byteswritten ) = ();

    my $proxyLogFile = eval { gzopen( $file, 'rb' ) } or $log->logdie( sprintf( "Could not open %s for reading", $file ) ); #open each log file and concatenate to combined log file

    while (
        !$proxyLogFile->gzeof()
        )
    {

        $bytesread += $proxyLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $proxyLogFile->gzerror()));

        $byteswritten += $combinedLogFile->gzwrite($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));

    }

    $log->info( sprintf( "Added %s to %s", $file, $combinedLogFileName ) );

    #want to be sure we did not lose any info

    $log->debug( sprintf( "%s bytes read from %sn", $bytesread, $file ) );

    $log->debug(

        sprintf(

            "%s bytes written to %sn",

            $byteswritten, $combinedLogFileName

            )

        );

    $proxyLogFile->gzclose();

}

$combinedLogFile->gzclose();

cleanUp(@fileList);

return 1;

} #what parameters are we running with

sub runInfo {

my ( $site, $date, $config ) = @_;

$log->info( sprintf( "DATE = %s", $date ) );

$log->info( sprintf( "DOWNLOADDIR = %s", $config-> { logdir }));

$log->info( sprintf( "BINDIR = %s", $config-> { bindir }));

$log->info( sprintf( "CCEDIR = %s", $config-> { ccelog }));

$log->info( sprintf( "Host info for %s proxies", $site ) );

foreach my $ipArray ( $config->{ site }->{ $site }->{ address }) {

    foreach my $ip (@$ipArray) { $log->info( sprintf( "HOST = %s", $ip ) ); }

}

}

sub cleanUp {

my (@fileList) = @_;

foreach my $file (@fileList) { unlink($file); $log->info( sprintf( "Deleted %s", $file ) ); }

}

sub yahooMessageBoards {

my ( $site, $date ) = @_;

my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY);

my ( $workstation, $datetime, %ipaddr, %urls, %size, $total_bytes, $hits );

my $log = Log::Log4perl->
    get_logger();

my @messageURL = ();

my ( $buffer, $bytesread ) = ();

my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));

my $file = sprintf( "%s/yahoo/%s.%s.csv", $config->{ reportdir }, $site, $dateMM_DD_YYYY);

my $yahooReport = eval { new FileHandle( $file, 'w' ); } or $log->logdie( sprintf( "Could not open %s for writing", $file ) );
my $dns = eval { new Net::DNS::Resolver } or $log->logdie($$);

while ( !$combinedLogFile->gzeof()) {

    $bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));

    next if ( $buffer !~ /messages.yahoo.com/ );

    next if ( $buffer !~ /tid=bmy/ );

    push( @messageURL, $buffer );

}

$combinedLogFile->gzclose();
$log->info( sprintf( "Read %s MB from %s", $bytesread / ( 1024 * 1024 ), $combinedLogFileName ) );

foreach my $line (@messageURL) {

    my ( $timestamp, $workstation, $url ) = ( split( ' ', $line ) )[ 0, 2, 6 ];

    if ( ( Time_to_Date(time) )[ 0, 1, 2 ] == ( Add_Delta_Days( ( Time_to_Date($timestamp) )[ 0, 1, 2 ], 1 ) )) {

        my $query = $dns->search($workstation);

        if ($query) {

            foreach my $entries ( $query->answer ) {

                $workstation = $entries->ptrdname();

            }

        }

        else {

            $log->logwarn( sprintf( "query failed: %s", $dns->errorstring));
        }

    }

    $yahooReport->printf( "%s,%s,%sn", $timestamp, $workstation, $url );

}

$yahooReport->close();
$log->info( sprintf( "Created report for %s in %s", $date, $file ));

}

sub topDomains {

my ( $site, $date ) = @_;

my $maxlines = 100;

my $dateMM_DD_YYYY = join( "-", ( split( /-/, $date ) )[ 1, 2, 0 ] );

my $combinedLogFileName = sprintf( "%s/%s.%s.gz", $config->{ logdir }, $site, $dateMM_DD_YYYY);

my $bytesread;

my $buffer;

my $log = Log::Log4perl->get_logger();

my ( %category, %urls, %size, $total_bytes, $hits );

my $combinedLogFile = eval { gzopen( $combinedLogFileName, 'rb' ); } or $log->logdie( sprintf( "Could not open %s for writing", $combinedLogFileName ));

while( !$combinedLogFile->gzeof() ) {

    $bytesread += $combinedLogFile->gzreadline($buffer) or $log->logdie( sprintf( "%s", $combinedLogFile->gzerror()));

    chomp($buffer);

    my ( $timestamp, $bytes, $url, $cat ) = ( split( ' ', $buffer, 12 ) )[ 0, 4, 6, 11 ];

    my $datetime = $timestamp;

    my $domain =~ s/w+:\/\/([^/]+).*$/$1/;

    if ( $domain !~ /d{1,3}.d{1,3}.d{1,3}.d{1,3}/ ) {
        my @pieces = split( /./, $domain ); $domain = sprintf( "%s.%s", $pieces[-2], $pieces[-1] );
    }

    if ( !defined($cat) ) { $cat = 'NONE'; }

    print $buffer, "n";

    printf( "%s, %s, %s, %sn", $timestamp, $bytes, $domain, $cat);

    $category{$domain} = $cat;

    $urls{$domain}++;

    $size{$domain} += $bytes;

    $total_bytes += $bytes;

    $hits++;

}

}
`

processlog.pl %name

Husband, father, information security professional and avid photographer living at the junction of Princeton Township, Montgomery Township and Rocky Hill.

Liked this post? Follow this blog to get more. 

Leave a Reply

Your email address will not be published. Required fields are marked *