File: //opt/hgmods/modsec_block.pl
#!/usr/bin/perl
use Time::Local 'timelocal_nocheck';
use Fcntl qw(:flock SEEK_END);
use Sys::Hostname;
use IO::Socket::INET;
use Data::Dumper;
use JSON;
##TODO: flush out the mtype stuff and possibly add a file to set mtype by rule
## for now mtypes are:
## 1 = general bad rule (injection attempts, etc)
## 2 = crawler type rules (mj12bot, googlebot/etc limiters, etc)
## 3 = post logging rules (wp-login/comments/administrator/etc) - this may be split up in the future
## 4 = general error logging (things that dont really have an ID etc)
my %checked = map { $_, 1 } qw(
345114
345115
345117
900165
900291
900402
900405
900406
900407
900408
900409
900911
900912
900923
900934
900935
900936
9009999
);
# how many kb max to go back in log if no start point avail (tail -c this much *1024)
my $MAX_BACKTRACK = 100;
my $STATUS_FILE = '/opt/eig_linux/var/modsec_error_log_status';
my $LOCK_FILE = '/opt/eig_linux/var/modsec_block_run_lock';
my $ERROR_LOG = '/usr/local/apache/logs/error_log';
my $f_l;
open($f_l, '>', $LOCK_FILE) or exit(42);
flock($f_l, LOCK_EX | LOCK_NB) or exit(43);
# random sleep since all servers are sending these batches every 5 min, avoid slamming glog
sleep(int(rand(15)));
our @TIMECACHE = (0,0);
our $GL_SOCKET; # used by send_to_graylog to fake object persistence
our $HOSTNAME = hostname();
my $fh;
my %USERDOMAINS = ();
my %USEROWNERS = ();
open($fh, '<', '/etc/userdomains');
while (<$fh>) {
chomp;
if (/^(\S\S+): (\S+)$/) {
$USERDOMAINS{$1} = $2;
$USEROWNERS{$2} = $2;
}
}
close($fh);
if ($HOSTNAME !~ /\.hostgator\.com/) { # for now to save time on shared, should extend at some point
open($fh, '<', '/etc/trueuserowners');
while (<$fh>) {
chomp;
if (/^(\w+): (\w+)$/ && $2 ne 'root' && $2 ne '*') {
$USEROWNERS{$1} = $2;
}
}
close($fh);
}
my $offsetbytes = 0;
if (-e $STATUS_FILE) {
open($fh, '<', $STATUS_FILE);
chomp($offsetbytes = <$fh>);
close($fh);
if ($offsetbytes !~ /^[0-9]+$/) {
$offsetbytes = 0;
}
}
open(my $fh, '<', $ERROR_LOG) or die('could not open error log');
my $logsize = (stat($ERROR_LOG))[7]; # get the size after opening to prevent weird race
if ($logsize < $offsetbytes) {
$offsetbytes = 0;
}
if ($logsize >= $offsetbytes && $offsetbytes > 0) {
seek($fh, $offsetbytes, 0);
} elsif (!$offsetbytes && $logsize > ($MAX_BACKTRACK + 3) * 1024) { # if the log is just a little too long, just get it all
$offsetbytes = $logsize - $MAX_BACKTRACK*1024;
seek($fh, $logsize - $MAX_BACKTRACK*1024, 0);
}
my $readbytes = 0;
my $temphost;
while (<$fh>) {
$readbytes += length($_);
if (/ModSecurity:/) {
chomp;
my $record = parse_modsec_line($_);
if ($record) {
next unless exists $checked{$record->{'_id'}};
$temphost = $record->{'_hostname'};
$temphost =~ s/^www\.//;
if ($USERDOMAINS{$temphost}) {
$record->{'_user_host_id'} = $USERDOMAINS{$temphost} . '-' . $HOSTNAME;
$record->{'_owner_host_id'} = $USEROWNERS{$USERDOMAINS{$temphost}} . '-' . $HOSTNAME;
} elsif ($record->{'_uri'} =~ /^\/+~([a-z0-9]+)\// && $USEROWNERS{$1}) {
$record->{'_user_host_id'} = $1 . '-' . $HOSTNAME;
$record->{'_owner_host_id'} = $USEROWNERS{$1} . '-' . $HOSTNAME;
}
if ($record->{'_remote_addr'} =~ /^([0-9]+\.[0-9]+\.[0-9]+)\.[0-9]+$/) {
$record->{'_remote_24'} = $1;
}
if ($record->{'_msg'} =~ /logging/) {
$record->{'_mtype'} = 3;
} elsif ($record->{'_msg'} =~ /bot|crawler/i) { # might need to enhance
$record->{'_mtype'} = 2;
} elsif (!$record->{'_id'} || !$record->{'_remote_addr'}) {
$record->{'_mtype'} = 4;
} else {
$record->{'_mtype'} = 1;
}
send_to_graylog($record);
}
} elsif (/mod_rbld.c/) {
$record = parse_rbld_line($_);
send_to_graylog($record) if ($record->{'_bl'});
}
}
close($fh);
#print "$$: got offsetbytes $offsetbytes logsize $logsize readbytes $readbytes\n";
open($fh, '>', $STATUS_FILE);
$offsetbytes += $readbytes;
print $fh $offsetbytes;
close($fh);
sleep(1);
close($f_l);
sub parse_modsec_line {
my $line = shift;
# (month,day,time,year,loglevel,clientip,rest of modsec message to be parsed)
my @lineparts = $line =~ m/^\[[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9][0-9]) ([0-9][0-9]:[0-9][0-9]:[0-9][0-9])\S* ([0-9][0-9][0-9][0-9])\] \[:?([a-z:0-9]+)\] (?:\[pid \S+ \d+\] )?\[client ([0-9.]+).*?ModSecurity: (.+)$/;
return 0 if (scalar @lineparts != 7);
my @toks = split(/ /, $lineparts[6]);
my %tokout;
my $quoted = 0;
my $tkcount = 0;
my $tkname = '_description'; # most are like [(tokname), but first one before [ starts as this
if ($lineparts[6] =~ /Access denied with code ([0-9]+)[^0-9]/) {
@{$tokout{'deny_code'}} = ($1);
}
@{$tokout{'timestamp'}} = (log2time($lineparts[0],$lineparts[1],$lineparts[3],$lineparts[2]));
@{$tokout{'_remote_addr'}} = ($lineparts[5]);
@{$tokout{'short_message'}} = ($lineparts[6]);
foreach my $tok (@toks) {
if (!$quoted && $tok =~ /^\[(\w+)$/) {
$tkname = '_'.$1;
@{$tokout{$tkname}} = ();
$tkcount++;
} elsif ($tok =~ /^\"/) {
if (!$tkcount) {
$quoted ^= 1 if ($tok!~/(?!\\)\"$/);
} else {
$tok=~s/^\"|(?!\\)\"\]$//g;
}
push @{$tokout{$tkname}}, $tok;
} elsif (!$tkcount && $quoted && $tok=~/(?!\\)\"$/) {
$quoted = 0;
push @{$tokout{$tkname}}, $tok;
} else {
$tok=~s/(?!\\)\"\]$// if ($tkcount);
push @{$tokout{$tkname}}, $tok;
}
}
return { map { $_, join(' ', @{$tokout{$_}}) } keys %tokout };
}
# this is a much simpler thing than modsec, but keeping it a function since we may need to expand when using on provo brands
sub parse_rbld_line {
my $line = shift;
chomp($line);
my @lineparts = $line =~ m/^\[[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9][0-9]) ([0-9][0-9]:[0-9][0-9]:[0-9][0-9])\S* ([0-9][0-9][0-9][0-9])\].* \[mod_rbld.c\] ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) is listed in (\w+)$/;
my $tokout = {};
$tokout->{'timestamp'} = log2time($lineparts[0],$lineparts[1],$lineparts[3],$lineparts[2]);
$tokout->{'short_message'} = $lineparts[4] . ' is listed in ' . $lineparts[5];
$tokout->{'_remote_addr'} = $lineparts[4];
$tokout->{'_bl'} = $lineparts[5];
$tokout->{'_facility'} = 'mod_rbld';
return $tokout;
}
# log2time(month,day,year,time)
sub log2time {
my @time = split(/:/, $_[3]);
my $now;
if ($TIMECACHE[0] eq "$_[0] $_[1] $time[0]") {
$now = $TIMECACHE[1] + 60*$time[1] + $time[2];
if ($now < $TIMECACHE[2] + 3600 && $now >= $TIMECACHE[2]) {
$TIMECACHE[2] = $now;
return $now;
}
}
my %months = ('Jan'=>0,'Feb'=>1,'Mar'=>2,'Apr'=>3,'May'=>4,
'Jun'=>5,'Jul'=>6,'Aug'=>7,'Sep'=>8,'Oct'=>9,
'Nov'=>10,'Dec'=>11);
my $day = int($_[1]);
$now = timelocal_nocheck( 0, 0, int($time[0]), $day, $months{$_[0]}, $_[2] - 1900);
$TIMECACHE[0] = "$_[0] $_[1] $time[0]";
$TIMECACHE[1] = $now;
$now = $now + $time[1]*60 + $time[2];
$TIMECACHE[2] = $now;
return $now;
}
sub send_to_graylog {
my $msg = shift;
$msg->{'host'} = $HOSTNAME;
$msg->{'version'} = '1.1';
$msg->{'_facility'} = 'modsecurity' unless($msg->{'_facility'});
my $json = encode_json($msg) . "\n";
if (!$GL_SOCKET || !$GL_SOCKET->connected) {
$GL_SOCKET = new IO::Socket::INET ( PeerAddr => 'glog.eigbox.com',
PeerPort => 12201,
Proto => 'udp') or die("Cannot create socket!");
}
$GL_SOCKET->send($json);
}