#!/usr/bin/perl

use strict;

require HTTP::Request;
require HTTP::Response;
require HTTP::Parser;
use Digest::MD5 qw(md5 md5_hex);
use Encode;
use Sys::Syslog;


use File::LibMagic ':complete';



my $to_server_filename;
my $to_client_filename;
my $to_server_filename_basename;
my $to_client_filename_basename;

my $filename;
my $line_number;
my $line;

my $request_num = 0;
my $response_num = 0;


my $request_parser;
my $response_parser;

my $request_status;
my $response_status;

my $request_leftover_data;
my $response_leftover_data;

my $transaction_id;
my $transaction_summary;
my $payload_filetype;
my $payload_filetype_escaped;
my $payload_length;
my $payload_magic_type;
my $magic_handle;

my $request_method;
my $response_content_length;
my $response_content_encoding;
my $response_transfer_encoding;


my $collect_payload = 1;
my $payload_root_dir = "/dev/shm/payloads";
my $payload_filename;
my $payload;
my $payload_hash;

my $request_host;
my $request_resource;

my $unlink_streams = 1;


$magic_handle = magic_open(0);
magic_load($magic_handle,"");




openlog("http", "ndelay,pid", "local1");


STREAM: while(<>)
{
    #$to_server_filename = $_;
    #chomp($to_server_filename);
    #if ( $to_server_filename =~ m/\dc\d/ )
    #{
    #    next;
    #}
    #$to_client_filename = $to_server_filename;
    #$to_client_filename =~ s/(\d)s(\d)/$1c$2/;
    
    $to_client_filename = $_;
    chomp($to_client_filename);
    if ( $to_client_filename =~ m/\ds\d/ )
    {
        next;
    }
    $to_server_filename = $to_client_filename;
    $to_server_filename =~ s/(\d)c(\d)/$1s$2/;
    
    if (! open(REQUEST_FILE, $to_server_filename))
    {
        warn("couldn't open to server stream: $to_server_filename");
        close(REQUEST_FILE);
        if ($unlink_streams)
        {
            unlink($to_server_filename);
            unlink($to_client_filename);
        }
        next(STREAM);
    }
    
    if (! open(RESPONSE_FILE, $to_client_filename))
    {
        warn("couldn't open to client stream: $to_client_filename");
        close(RESPONSE_FILE);
        close(REQUEST_FILE);
        if ($unlink_streams)
        {
            unlink($to_server_filename);
            unlink($to_client_filename);
        }
        next(STREAM);
    }
        
    $line_number = 0;
    
    $request_parser =  HTTP::Parser->new(request => 1);
    $request_leftover_data = "";
    $response_leftover_data = "";
    
    $request_num = 0;
    $response_num = 0;

    #remove the leading directories from the filenames
    #this may fail if there is not / in the filename
    $to_server_filename_basename = substr($to_server_filename, (rindex($to_server_filename,'/')+1));
    $to_client_filename_basename = substr($to_client_filename, (rindex($to_client_filename,'/')+1));

    
    #print($to_server_filename."\n");
    
    REQUEST: while(<REQUEST_FILE>)
    {
       $line = $_;
       
       
       eval
       {
        $request_status = $request_parser->add($line);
       };
       if ($@)
       {
        warn("Error parsing request in $to_server_filename");
        close(RESPONSE_FILE);
        close(REQUEST_FILE);
        if ($unlink_streams)
        {
            unlink($to_server_filename);
            unlink($to_client_filename);
        }
        next(STREAM);
       }
       
       
       
       
       
       
        if ($request_status == 0)
        {
            #our request is complete, now parse the response
            $request_num++;
            
            $request_method = $request_parser->request()->method();
            
            $transaction_id = $to_server_filename_basename."_http-".$request_num;
            
            if ($request_parser->extra())
            {
                $request_leftover_data = $request_parser->data();
            } else
            {
                $request_leftover_data = "";    
            }
            
            $response_parser =  HTTP::Parser->new(response => 1);
            if ($response_leftover_data)
            {
                $response_status = $response_parser->add($response_leftover_data);
            }
            
            RESPONSE: while(<RESPONSE_FILE>)
            {
                $line = $_;
                
                eval
                {
                    $response_status = $response_parser->add($line);
                };
                if ($@)
                {
                    warn("Error parsing response in $to_client_filename");
                    close(RESPONSE_FILE);
                    close(REQUEST_FILE);
                    if ($unlink_streams)
                    {
                        unlink($to_server_filename);
                        unlink($to_client_filename);
                    }
                    next(STREAM);
                }
                
                if ($response_status == 0)
                {
                    #this makes sure the data is interpretted as binary data not ascii or utf or whatever
                    use bytes;
                    
                    #now we have response that corresponse with request. 
                    $response_num++;
                    #$request_parser->request()->header("host")." ".$request_parser->request()->uri()
                    
                    
                    
                    $payload_filetype = $response_parser->object()->header("Content-Type");
                    $payload_filetype = lc($payload_filetype);
                    $payload_filetype =~ s/^\s*([a-z0-9\/\.-]+).*$/$1/;
                    if ($payload_filetype =~ m/^\s*$/ )
                    {
                        $payload_filetype = "-";
                    }
                    
                    
                    $payload_filetype_escaped = $payload_filetype;
                   
                    $payload_filetype_escaped =~ tr/\//\=/;
                    
                    #$payload_length = 0;
                    $payload = $response_parser->object()->decoded_content();
                    
                    #having trouble dealing with payload content in scalar, just dump it to file for further processing
                    if (length($payload) > 0)
                    {
                        #open file
                        #$payload_filename = $payload_root_dir."/".$transaction_id."_payload-".$payload_length."-".$payload_hash;
                        $payload_filename = $payload_root_dir."/".$transaction_id."-payload";
                        if (! open(PAYLOAD_FILE, ">", $payload_filename))
                        {
                            warn("couldn't open payload file: ".$payload_filename);
                        }
                        binmode(PAYLOAD_FILE, ":raw");
                        
                        #write payload
                        if ( ! print PAYLOAD_FILE $payload )
                        {
                            warn("couldn't write payload to file: ".$payload_filename);
                        }
                        close(PAYLOAD_FILE);
                        
                        #process payload
                        $payload_length = -s $payload_filename;
                        
                        if (open(PAYLOAD_FILE, $payload_filename))
                        {
                            binmode(PAYLOAD_FILE);
                            $payload_hash = Digest::MD5->new->addfile(*PAYLOAD_FILE)->hexdigest;
                            close(PAYLOAD_FILE);
                        } else
                        {
                            warn("couldn't open payload for hashing: ".$payload_filename);
                            $payload_hash = "-";
                        }
                        #$payload_magic_type = magic_file($magic_handle, $payload_filename);
                        
                        
                        if ($collect_payload)
                        {
                            print($payload_filename."\n");
                        } else
                        {
                            unlink($payload_filename);
                        }
                    } else
                    {
                        $payload_length = 0;
                        $payload_hash = "-";
                        $payload_magic_type = "null";
                    }
                    
                    
                    #$payload_length = length($payload);
                    #$payload_length = bytes::length($payload);
                    #$payload_hash = md5_hex($payload);
                    #if ($payload_length > 0)
                    #{
                    #    $payload_magic_type = magic_buffer($magic_handle, $payload);
                    #} else
                    #{
                    #    $payload_magic_type = "null";
                    #}
                    
                    
                    $response_content_length = $response_parser->object()->header("Content-Length");
                    $response_content_length =~ s/^\s*([0-9]+).*$/$1/;
                    if ($response_content_length =~ m/^\s*$/ )
                    {
                        $response_content_length = "-";
                    }
                    
                    $response_content_encoding = $response_parser->object()->header("Content-Encoding");
                    $response_content_encoding = lc($response_content_encoding);
                    $response_content_encoding =~ s/^\s*([a-z0-9,-]+).*$/$1/;
                    if ($response_content_encoding =~ m/^\s*$/ )
                    {
                        $response_content_encoding = "-";
                    }
                    
                    $response_transfer_encoding = $response_parser->object()->header("Transfer-Encoding");
                    $response_transfer_encoding = lc($response_transfer_encoding);
                    $response_transfer_encoding =~ s/^\s*([a-z]+).*$/$1/;
                    if ($response_transfer_encoding =~ m/^\s*$/ )
                    {
                        $response_transfer_encoding = "-";
                    }
                    
                    $request_host = $request_parser->request()->header("host");
                    if (! $request_host )
                    {
                        $request_host = "-";
                    }
                    
                    $request_resource = $request_parser->request()->uri();
                    $request_resource =~ s/\s/_/;
                    
                    
                    
                    $transaction_summary = $transaction_id." ".$request_method." ".$request_host." ".substr($request_resource, 0, 700)." ".$response_parser->object()->code()." ".$payload_length." ".$payload_hash." ".$response_content_length." ".$payload_filetype." ".$response_content_encoding." ".$response_transfer_encoding;
                    syslog("info", "%s", $transaction_summary);
                    
                    if ($response_parser->extra())
                    {
                        $response_leftover_data = $response_parser->data();
                    } else
                    {
                        $response_leftover_data = "";
                    }
                    last;
                }
                
                
            }
                 
            
            
            $request_parser =  HTTP::Parser->new(request => 1);
            if ($request_leftover_data)
            {
                $request_parser->add($request_leftover_data);
            }
            
        }
       
       
       
        
    }
    
    close(RESPONSE_FILE);
    close(REQUEST_FILE);
    if ($unlink_streams)
    {
        unlink($to_server_filename);
        unlink($to_client_filename);
    }
}

magic_close($magic_handle);









