#!/usr/bin/perl 
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# "Doing linear scans over an associative array is like trying to club
#  someone to death with a loaded uzi."  --Larry Wall  (Thanks Larry)
#
# Read a pcap file and generate a data file histogram of payload content,
# suitable to pass to gnuplot or excel with your favorite options.
# Requires the Net::Pcap CPAN module
#
# BUGS:
#   - Assumes little-endian capture
#   - Only deals with IPv4 packets.  Rewrite your capture file to include only
#     IPv4 packets with tcpdump (tcpdump -r oldcap.dump -w newcap.dump "ip")
#   - Does not handle layer 4 packets other than ICMP, UDP and TCP.  You can
#     use a similar filter with tcpdump to extract only these protocols,
#     (tcpdump -r oldcap.dump -w newcap.dump "tcp or udp or icmp")
#
# 2/25/2004 jwright@hasborg.com
# 9/20/2005 Merged patch from Frank Sweetser <fs|at|WPI.EDU> to use an array
# instead of a hash for a considerable performance improvement.  Thanks Frank!

use strict;
use Getopt::Long;
use IO::Handle;

eval {
    require Net::Pcap;
};
if ($@) {
    print <<EOT;

The Net::Pcap module is required for this tool.  Download this tool from the
CPAN website (search.cpan.org), or install via 
"perl -MCPAN -e 'install Net::Pcap'".  You will need libpcap for this module.

Windows users using ActiveState Perl can install this module with the ppm tool.
As of 2/26/2004 this module isn't in the standard PPM repository, but you can
install it from JL Morel's website with
"ppm install http://www.bribes.org/perl/ppm/Net-Pcap.ppd".  You must also 
install the winpcap software from http://winpcap.polito.it/.

EOT
    exit(-1);
}


                 
my ($progname,      $payloadbyte,
    $p,             $err,
    $packet,
    $i,             $unpacket,
    $iphlen,        $ipver,
    $iptotlen,      $iptotlen_dec,
    $ipproto,       $ippayload,
    $payload,       $payloadoffset,
    $packetcount,   $datafilename,
    $capturenoext,  $imagefilename,
    $datalink,      $hoffset,
   );

my @datarray;

$packetcount=0;
$progname = $0;
$progname =~ s,.*/,,;    # only basename left in progname
$progname =~ s,.*\\,,;    # only basename left in progname (windows convention)
$progname =~ s/\.\w*$//; # strip extension if any

$SIG{INT} = \&sigint_handler;

if (@ARGV < 1) {
    print("$progname: Generate a data file histogram of a libpcap file.\n");
    print("\nusage: $progname filename.dump | gnuplot\n\n");
    print("gnuplot will create a histogram called filename.png\n");
    exit(1);
}

# Allow users to specify their own offset on the command-line.
# "undocumented" feature
if (@ARGV == 2) {
    $hoffset = $ARGV[1];
}

if ($p = Net::Pcap::open_offline($ARGV[0], \$err)) {

    if (!$hoffset) {
        $datalink = Net::Pcap::datalink($p);
        # Fake a case block
        CASE: {
            # EN10MB capture files
            ($datalink == 1) && do {
                $hoffset = 14;
                last CASE;
            };

            # Linux cooked socket capture files
            ($datalink == 113) && do {
                $hoffset = 16;
                last CASE;
            };

            # DLT_IEEE802_11 capture files
            ($datalink == 105) && do {
                $hoffset = 32;
                last CASE;
            }
        }
    }

    if (!$hoffset) {
        print("Could not identify the offset to the start of the IP header. Try specifying the\n");
        print("number of bytes to the start of the IP header as the 2nd command-line argument.\n");
        exit(1);
    }

    Net::Pcap::loop($p, -1, \&process_pkt, '');
} else {
    print("Could not open $ARGV[0]: $err\n");
    exit(1);
}

$capturenoext = $ARGV[0];
$capturenoext =~ s,.*/,,;    # only basename left
$capturenoext =~ s,.*\\,,;   # only basename left in (windows convention)
$capturenoext =~ s/\.\w*$//; # strip extension if any
$datafilename = $capturenoext . ".data";
$imagefilename = $capturenoext . ".png";

open(DATAFILE, ">$datafilename");

for my $byte ( 0 .. 255 ){
    print DATAFILE $byte . "\t" . $datarray[$byte] . "\n";
}

close(DATAFILE);

# Set some good options for gnuplot
print "set title \"Packet Payload Histogram for " . $ARGV[0] . "\"\n";
print "set xlabel \"Byte Values\"\n";
print "set ylabel \"Frequency\"\n";
print "set autoscale\n";
print "set terminal png\n";
print "set output \"" . $imagefilename . "\"\n";
print "set yrange [0:*]\n";
print "set xrange [0:255]\n";
print "set format x \"%02x\"\n";
print "set nokey\n";
print "set size 1,0.5\n";
#print "plot \"" . $datafilename . "\" lt 4\n";
print "plot \"" . $datafilename . "\" with points lc 3\n";
print "quit\n";
exit(0);

sub print_pkt {
    my ($packet) = @_;    
    $i=0;
    while ($i < length($packet)) {
        print (substr($packet, $i, 4) . " ");
        $i = $i + 4;
        # mod 32 since we are dealing with ascii values, not hex values
        # (two characters instead of one byte)
        if (($i % 32) == 0) { print "\n"; };
    }
    print "\n\n";
} 

sub process_pkt {
    my ($data, $header, $packet) = @_;
    $unpacket = unpack('H*', substr($packet, $hoffset));

    #print_pkt($unpacket);

    $packetcount++;
    $ipver = substr($unpacket, 0, 1);
    $iphlen = substr($unpacket, 1, 1);
    $iptotlen = substr($unpacket, 4, 4);
    $ipproto = substr($unpacket, 18, 2);

    if (int($ipver) != 4) {
        # Can't handle non-ip packets
        print STDERR "Skipping non-ip packet.\n";
    }

    # $iphlen is in 32-bit words, * 2 since we are in ascii format
    $ippayload = substr($unpacket, (($iphlen * 4) * 2));

    # Calculate the layer 4 protocol payload offset value.  This value will
    # be the offset in bytes.  We'll have to multiple this by 2 later since
    # we are dealing with ascii characters and not hex values.
    if ($ipproto == 1) {        # ICMP
        $payloadoffset = (8);
    } elsif ($ipproto == 6) {   # TCP
        # Get the embedded header length, * 4 to convert from 32-bit words
        $payloadoffset = (substr($ippayload, 24, 1));
        $payloadoffset = hex($payloadoffset);
        $payloadoffset = $payloadoffset * 4;
    } elsif ($ipproto == 11) {  # UDP
        $payloadoffset = (8);
    } else {
        print STDERR "Unknown IP Protocol: $ipproto.  Skipping packet.\n";
        return(0);
    }

    # Make sure there is an upper-layer payload available
    #if (length($ippayload) > ($payloadoffset * 2)) {
    $iptotlen = "0x" . $iptotlen;
    $iptotlen_dec = hex($iptotlen);
    if ($iptotlen_dec > (($iphlen * 4) + ($payloadoffset))) {
        $payload = substr($ippayload, ($payloadoffset * 2));
        #print "Packet number $packetcount:\n";
        #print_pkt($payload);
        
            # Collect each byte value in the payload contents, add to counter in
            # @datarray for the corresponding value
            $i=0;
            while ($i < length($payload)) {
                $payloadbyte = hex(substr($payload, $i, 2));
                #print "$payloadbyte\n";
                $datarray[$payloadbyte]++;
                $i = $i + 2;
            }
    }

    return(0);
}


sub sigint_handler {
     # Close pcap gracefully after CTRL/C
     if ($p) {
         Net::Pcap::close($p);
         print "\n";
     }
     print "Caught CTRL/C, exiting.";
     exit(0);
}
