#!/usr/bin/perl

# wrt prime-suspect signals server probe
# Requires user dd to be created and execdir a world writable sub-directory
# due to tshark buffering (even withe use of -l), the HTTP packet processing is always 2 packets behind

# arg0 - sandbox port  set as 5010
# arg1 - input queue directory
# arg2 - prime-suspects directory
# arg3 - start time-stamp used by interval-based logging

use IO::Socket::INET;
use threads;
use URI::Escape;
use Time::HiRes;


if (scalar @ARGV < 4) {die "check usage\n";}

# Ignoring SIGPIPE rather than terminating
$SIG{PIPE} = 'IGNORE';
#Flush all output buffers
$|=1;

#SUB-ROUTINES

#cient request processing thread, get next HTTP packet and enque
sub getpackets {


threads->self->detach();

my $session = $_[0];
my $getpacketsctr = time;

	while($nxtHTTP = <$session>)
	{

	  	$cumuline .= $nxtHTTP;

		#completed packet
                if($cumuline =~ /(<packet>.*?<\/proto>\n<\/packet>)/si)
        	{

                        #instr
                        $instrstartts = Time::HiRes::time;

			$tag = $getpacketsctr; $getpacketsctr++;
			open(FQ,">$ARGV[1]/inputhttp".$tag);
                        $HTTPreqfull = $1;
                        print FQ $HTTPreqfull;
			close(FQ);
                	$cumuline =~ s/.*?<\/proto>\n<\/packet>//si;

                        $HTTPreqfull =~ /field name="timestamp".*value="(.+?)"/i;
                        $instrid = $1;
                        $instrendts = Time::HiRes::time;
                        $interval=(($instrstartts - $ARGV[3])/60)%5;
                        if($interval == 0 || $ARGV[3] == 0)
                        {
                                open(INSTRF, ">>wrtperflogs/wrtpampsinit.log");
                                print INSTRF "\"$instrid\",\"$instrstartts\",\"$instrendts\"\n";
                                close(INSTRF);
                        }

        	}

	}

}

#extract user values from input HTTP request
sub getuservals {

	#clean up HTTPReq
	$HTTPreq =~ s/value="\s*"//g;

	#load all values in array - loading cookie and value lists individually
	$i=0;
	while($HTTPreq =~ / value="(.*?)"/gs)
	{
		$uval = pack("H*",$1);
        	if($uval =~ /^Cookie:(.*?)(\r\n)/si)
		{
        		$cookiestr = $1.$2;
        		while($cookiestr =~ /(\w+?)=(.*?)(;|\r\n)/gsi)
        		{
                		$uservals[$i] = $2;
                		$i++;
        		}
		} elsif($uval =~ /^[\w-]+?:(.*?)(\r\n)/si)
		{
        		$headerstr = $1.$2;
        		while($headerstr =~ /(.*?)(;|\r\n)/gsi)
        		{
                		$uservals[$i] = $1;
                		$i++;
        		}

		} else {

			$uservals[$i] = $uval;
			$i++;
		}
		
	}

	#further clean-up
	@tmpuservals = ();
	foreach $userval (@uservals)
	{
		if ($userval !~/^\s*$/)
		{
			push(@tmpuservals, $userval);
		}
	}
	@uservals = @tmpuservals;


	#Missing values: Whole QueryString, GET/POST values extract + normalize 
	$uservals[2] =~ /.+?\?(.+)/;
	$qs = $1;
	$qs =~ tr/+/ /;
	$qs = uri_unescape($qs);
	push(@uservals, $qs);

	@tmpuservals = ();
	foreach $userval (@uservals)
	{
		while($userval =~ /(\S+?)=(\S+?)(&|\s|$)/gi)
		{
			$getpostval = $2;
			$getpostval =~ tr/+/ /;
			$getpostval = uri_unescape($getpostval);
			push(@tmpuservals, $getpostval)
		}
	}
	push(@uservals, @tmpuservals);


	#Debug dump - to comment out whole block
	#$j=0;
	#foreach $userval (@uservals)
	#{
		#print STDOUT "userval$j-$userval\n";
		#$j++;
	#}

}

#function testing for machine code and possibly return a prime-suspect id string
sub execmachine {

	$string = $_[0];

	#Efficiency gain based on realstic size of machine shell code
	if(length($string) < 64)
	{
		return;
	}

        #Efficiency gain based on disruptive nulls
        $nulltest = substr($string,0,64);
        if($nulltest =~ /\0[!\0]/si)
        {
                return;
        }

	
	#test string
	my $buf =
	"6a0b58995266682d6389e7682f73" .
	"6800682f62696e89e352e8110000" .
	"006563686f2048656c6c6f20576f" .
	"726c6400575389e1cd80";

	$rand = int(rand(1)*1000000000);
	
        #Utilizing bash rather than sh for execmachine due to input argument length
	#/bin/sh complains of strings ($hex) being too long
	$pid = open(RC, "| su dd -c \'strace -vfq -oexecdir/strace$rand.tmp /bin/bash\'");
	print RC "cd execdir \n";
	print RC "../runcode\n";
	@hexarray = unpack("H*", $string);
	$hex = join('',@hexarray);
	#print STDOUT "Sending $hex to RC\n"; #Debugging line
	print RC "$hex\n";
	$killed=0;

	# Utilizing a SIGALRM handler for handling long running executable content
	eval {
		local $SIG{ALRM} = sub {die "ALARM\n";};
    		alarm 30; # set to 30s
    		close(RC);
    		alarm 0;
	};

	#SIGALRM Handler - sending a SIGINT to $pid and all its child processes is REVERSE ORDER in order to have strace finish as graceful as possible
	if($@ && $@=~/ALARM\n/) {
		print STDOUT "command timed out.\n"; 
		$killed=1;

		open(PSTREE, "pstree -p $pid |");
		@pstree = <PSTREE>;
		close(PSTREE);

		@pgroup = ();
		foreach $pstreeline (@pstree)
		{
			while($pstreeline =~ /\((\d+)\)/gs)
			{
				push(@pgroup, $1);	
			}
		}
		@pgroup = sort(@pgroup);
		@pgroup = reverse(@pgroup);
		foreach $pgentry (@pgroup) 
		{
			kill 2, $pgentry;
		}

		close(RC);
	}


	#read strace file and return trace if any part of the  shell command returns with success or an error related to directory structure or permissions  or sigint - exit_group(0) and exit_group(1) respectively
	open(STRACE, "<execdir/strace$rand.tmp");
	@traces = <STRACE>;
	close(STRACE);
	unlink("execdir/strace$rand.tmp") or print STDOUT "UNLINK ERROR ------- $!\n";
	$tracestr = join("",@traces);
	$hex4 = substr($hex,0,4);
	if ($tracestr =~ /\n(\d+)\s+read\(0, "$hex4.*?\n\1(?!\s+read\(0, ").*?\n\1(?!\s+read\(0, ")/si || $killed == 1) #regex: check wether after the reading of $hex we have more than one syscall trace - ?! - these are look-ahead expressions
	{
		#chop instrumentation part of trace
		#print STDOUT "+++++++++++++++++++Matched $&++++++++++++++++\n";
		$tracestr =~ s/.*?chdir.*?execve.*?\n//si;
		return $tracestr;	
	} else {
		return ""; 
	}
}

#function testing for perl code and possibly returns a prime-suspect id string
sub execperl {

	$string = $_[0];
	$string =~ s/\r//gs; #clean up disruptive new lines
	$string =~ s/\n//gs; #clean up disruptive new lines

	#test string
	#$buf = "xxx;print STDOUT \"Hello World\n\";";
	#$buf = "adminadmin; elewf; dgregpro; welewpgg;";

	#extract parts of string that could possibly represent an injection
	if($string =~ /;/s)
	{

	$rand = int(rand(1)*1000000000);

	$pid = open(PERL, "| su dd -c  \'strace -vfq -oexecdir/strace$rand.tmp /bin/sh\'");
	print PERL "cd execdir \n";
	print PERL "perl -e \'".$string."\' ";
	$killed=0;

	# Utilizing a SIGALRM handler for handling long running executable content
	eval {
		local $SIG{ALRM} = sub {die "ALARM\n";};
    		alarm 30; # set to 30s
    		close(PERL);
    		alarm 0;
	};

	#SIGALRM Handler - sending a SIGINT to $pid and all its child processes is REVERSE ORDER in order to have strace finish as graceful as possible
	if($@ && $@=~/ALARM\n/) {
		print STDOUT "command timed out.\n"; 
		$killed=1;

		open(PSTREE, "pstree -p $pid |");
		@pstree = <PSTREE>;
		close(PSTREE);

		@pgroup = ();
		foreach $pstreeline (@pstree)
		{
			while($pstreeline =~ /\((\d+)\)/gs)
			{
				push(@pgroup, $1);	
			}
		}
		@pgroup = sort(@pgroup);
		@pgroup = reverse(@pgroup);
		foreach $pgentry (@pgroup) 
		{
			kill 2, $pgentry;
		}

		close(PERL);
	}


	#read strace file and return trace if any part of the  shell command returns with success or an error related to directory structure or permissions  or sigint - exit_group(0) and exit_group(1) respectively
	open(STRACE, "<execdir/strace$rand.tmp");
	@traces = <STRACE>;
	close(STRACE);
	unlink("execdir/strace$rand.tmp") or print STDOUT "UNLINK ERROR ------- $!\n";
	$tracestr = join("",@traces);
	if ($tracestr =~ /exit_group\(0\)|exit_group\(1\)/si || $killed == 1)
	{
		#chop instrumentation part of trace
		$tracestr =~ s/.*?chdir.*?execve.*?\n//si;
		return $tracestr;	
	}

	}
}


#function testing for BASH instructions and possibly returns a prime-suspect id string
sub execbash {


	$string = $_[0];
	$string =~ s/\r//gs; #clean up disruptive new lines
	$string =~ s/\n//gs; #clean up disruptive new lines

	#extract parts of string that could possibly represent an injection
	if($string =~ /[;\|]/s) 
	{

	$rand = int(rand(1)*1000000000);

	$pid = open(BASH, "| su dd -c  \'strace -vfq -oexecdir/strace$rand.tmp /bin/sh\'");
	print BASH "cd execdir \n";
	print BASH "$string\n";
	$killed=0;

	# Utilizing a SIGALRM handler for handling long running executable content
	eval {
		local $SIG{ALRM} = sub {die "ALARM\n";};
    		alarm 30; # set to 30s
    		close(BASH);
    		alarm 0;
	};

	#SIGALRM Handler - sending a SIGINT to $pid and all its child processes is REVERSE ORDER in order to have strace finish as graceful as possible
	if($@ && $@=~/ALARM\n/) {
		print STDOUT "command timed out.\n"; 
		$killed=1;

		open(PSTREE, "pstree -p $pid |");
		@pstree = <PSTREE>;
		close(PSTREE);

		@pgroup = ();
		foreach $pstreeline (@pstree)
		{
			while($pstreeline =~ /\((\d+)\)/gs)
			{
				push(@pgroup, $1);	
			}
		}
		@pgroup = sort(@pgroup);
		@pgroup = reverse(@pgroup);
		foreach $pgentry (@pgroup) 
		{
			kill 2, $pgentry;
		}

		close(BASH);
	}



	#read strace file and return trace if any part of the  shell command returns with success or an error related to directory structure or permissions  or sigint - exit_group(0) and exit_group(1) respectively
	open(STRACE, "<execdir/strace$rand.tmp");
	@traces = <STRACE>;
	close(STRACE);
	unlink("execdir/strace$rand.tmp") or print STDOUT "UNLINK ERROR ------- $!\n";
	$tracestr = join("",@traces);
	if ($tracestr =~ /exit_group\(0\)|exit_group\(1\)/si || $killed == 1)
	{
		#chop instrumentation part of trace
		$tracestr =~ s/.*?chdir.*?execve.*?\n//si;
		return $tracestr;	
	}

	}
}

#function testing for PHP code intended for in-process memory injection and possibly returns a prime-suspect id string
sub execphpmem {


	$string = $_[0];

        #delete parts of strings that are intended for storage-level injection
        $string =~ s/<\?php.*?\?>//gsi;

	#extract parts of string that could possibly represent an injection
	if($string =~ /;/s) 
	{

	$rand = int(rand(1)*1000000000);

	open(PHPFILE, ">execdir/phpfile$rand") or die "Cannot create phpfile$rand\n";
	print PHPFILE "<?php $string ?>";
	close(PHPFILE);
	chmod(0666,"execdir/phpfile$rand");
	$pid = open(PHP, "| su dd -c  \'strace -vfq -oexecdir/strace$rand.tmp /bin/sh\'");
	print PHP "cd execdir \n";
	print PHP "php -f phpfile$rand"; #note: php -r is faulty
	$killed=0;

	# Utilizing a SIGALRM handler for handling long running executable content
	eval {
		local $SIG{ALRM} = sub {die "ALARM\n";};
    		alarm 30; # set to 30s
    		close(PHP);
    		alarm 0;
	};

	#SIGALRM Handler - sending a SIGINT to $pid and all its child processes is REVERSE ORDER in order to have strace finish as graceful as possible
	if($@ && $@=~/ALARM\n/) {
		print STDOUT "command timed out.\n"; 
		$killed=1;

		open(PSTREE, "pstree -p $pid |");
		@pstree = <PSTREE>;
		close(PSTREE);

		@pgroup = ();
		foreach $pstreeline (@pstree)
		{
			while($pstreeline =~ /\((\d+)\)/gs)
			{
				push(@pgroup, $1);	
			}
		}
		@pgroup = sort(@pgroup);
		@pgroup = reverse(@pgroup);
		foreach $pgentry (@pgroup) 
		{
			kill 2, $pgentry;
		}

		close(PHP);
	}


	#read strace file and return trace if any part of the  shell command returns with success - exit_group(0) or executed successfully due to sandbox restrictions
	open(STRACE, "<execdir/strace$rand.tmp");
	@traces = <STRACE>;
	close(STRACE);
	unlink("execdir/strace$rand.tmp") or print STDOUT "UNLINK ERROR ------- $!\n";
	unlink("execdir/phpfile$rand") or print STDOUT "UNLINK ERROR ------- $!\n";
	$tracestr = join("",@traces);
	if ($tracestr =~ /exit_group\(0\)|exit_group\(1\)/si || $killed == 1)
	{
		#chop instrumentation part of trace
		$tracestr =~ s/.*?chdir.*?execve.*?\n//si;
		return $tracestr;	
	}

	}
}




#QUEUE PROCESSING LOOP
sub qloop {

my $getpacketsctr = time;

	while(1)
	{

		#HTTP request string currently being processed
		$HTTPreq = "";

		#uservalues array - filled with user values found in an HTTP request
		@uservals = "";

		#prime-suspect identification string 
		$psid = "";

		# process next packet in queue if any
		@files = <$ARGV[1]/in*>;
		
		foreach $file (@files)
		{
			open(HTTP, "<$file") or print STDERR "File opening error\n";
			@HTTPlines = <HTTP>;
			$HTTPreq = '';
			$HTTPreqfull = join("", @HTTPlines);

                        #instr
                        $HTTPreqfull =~ /field name="timestamp".*value="(.+?)"/i;
                        $instrid = $1;
                        $instrstartts = (stat($file))[9]; # limitation due to the IPC that that would be required, increasing computation time unneccessarily



                        $HTTPreqfull =~ /(<proto name="http".*$)/si;
                        $HTTPreq = $1;
			close(HTTP);
			$newfile = $file;
                        $newfile =~ s/input//i;
                        rename("$file","$newfile");
			unlink("$newfile");
			print STDOUT ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Handling $file\n";

			# process HTTP, url encoding normalize,  and fill uservalues array
			@uservals = ();
			&getuservals();
		

			#test each uservalue for executable strings for each executable evironment
			$i = 0;
			@trace1 = ""; 
			@trace2 = ""; 
			@trace3 = ""; 
			@trace4 = ""; 

			foreach $userval (@uservals)
			{
				$trace1[$i] = &execmachine($userval);
				$trace2[$i] = &execperl($userval);
				$trace3[$i] = &execbash($userval);
				$trace4[$i] = &execphpmem($userval);
				$i++;
			}

			#generate aggregated prime-suspect identification
			$psid = join("",@trace1).join("",@trace2).join("",@trace3).join("",@trace4);

			#If a prime-suspect, place HTTP request in prime-suspects directory along with aggregated psid
			if ($psid ne "")
			{
				$tag = $getpacketsctr; $getpacketsctr++;
				open(PRIME,">$ARGV[2]/inputprimehttp".$tag);
				print PRIME $HTTPreqfull;
				print PRIME "\n\n==========================================\n\n";
				print PRIME $psid;
				close(PRIME);
			}

                        #instr
                        $instrendts = Time::HiRes::time;
                        $interval=(($instrstartts - $ARGV[3])/60)%5;
                        if($interval == 0 || $ARGV[3] == 0)
                        {
                                open(INSTRF, ">>wrtperflogs/wrtpamps.log");
                                print INSTRF "\"$instrid\",\"$instrstartts\",\"$instrendts\"\n";
                                close(INSTRF);
                        }



		}

		sleep 1;

	}

}


#Luanch q process
$pid = fork();
if( $pid == 0 ){
   &qloop;
}


#Launch client server processing

my $listen_socket = IO::Socket::INET->new(LocalPort => $ARGV[0],
					Listen => 1,
					Proto => 'tcp',
					Reuse => 1) or die "Server Error\n";

print STDERR "wrtpamps listening on port $ARGV[0]\n";

while(my $conn = $listen_socket->accept())
{
	$csthread = threads->new(\&getpackets, $conn);
}






