#!/usr/local/bin/perl # gla, boone, 06/18/92 # Gopher log analyzer # Copyright (C) 1992, Michigan State University Board of Trustees # # Version 1.1 # # Mail to: Dennis Boone # # Modifications: # 06/18/92 Boone Initial coding # 11/13/92 Boone Added percentages # End Modifications # # Description: # gla reads a gopher log file and extracts statistics: times a file was # referenced and times any given domain came calling. Host names and # IP addresses are relieved of the most specific portion to ensure privacy. # There are five sections in the gla report: Hostnames in alpha order, # hostnames in order of frequency, filenames in alpha order, filenames in # order of frequency, and overall statistics. Each section can be completely # eliminated with a command line option. In addition, the number of hosts or # files reported can be limited using a command line option, for example to # generate a "most popular file" report. # # The gopher log is structured as follows: # # date/time [port] hostname : action # # date/time is a standard unix date format: ddd mmm dd hh:mm:ss yyyy # port is the client port number; it isn't present in the log files # written by all versions of the gopher server # hostname is the fqdn or IP address of the calling host # action is one of the following: # retrieved file path # retrieved directory path # Root Connection # # In addition, older versions of gopherd logged a start time in the # log file, which is filtered. # # Usage: gla [options] reportfile # # options: # -ha skip host-alpha section of report # -hf skip host-freq section of report # -fa skip file-alpha section of report # -ff skip file-freq section of report # -sum skip summary section of report # -hlim n limit host sections of report to n hosts # -flim n limit file sections of report to n files # # End Description $home = $ENV{'HOME'}; push(@INC, "$home/bin"); $mname{"Jan"} = 0; $mname{"Jul"} = 6; $mname{"Feb"} = 1; $mname{"Aug"} = 7; $mname{"Mar"} = 2; $mname{"Sep"} = 8; $mname{"Apr"} = 3; $mname{"Oct"} = 9; $mname{"May"} = 4; $mname{"Nov"} = 10; $mname{"Jun"} = 5; $mname{"Dec"} = 11; $secs_leap = 31622400; $secs_norm = 31536000; $secs_day = 86400; $secs_hour = 3600; $d28 = 2419200; $d29 = 2505600; $d30 = 2592000; $d31 = 2678400; @secs_month = ($d31, $d28, $d31, $d30, $d31, $d30, $d31, $d31, $d30, $d31, $d30, $d31); @secs_leap_month = ($d31, $d29, $d31, $d30, $d31, $d30, $d31, $d31, $d30, $d31, $d30, $d31); # # Is it leap year? # sub isleap { local($i) = @_; if ($i % 4) { return 0; } if ((!($i % 100)) && ($i % 400)) { return 0; } else { return 1; } } # # Compute seconds since Jan 1, 1970 without going south like timelocal() # sub epoch { local($tstr) = @_; local($i); @parts = unpack("a3 x a3 x a2 x a2 x a2 x a2 x a4", $tstr); if (substr($parts[2], 0, 1) == " ") { $parts[2] = substr($parts[2], 1, 1); } $since = 0; for ($i = 1970; $i < $parts[6]; $i++) { if (&isleap($i)) { $since += $secs_leap; } else { $since += $secs_norm; } } for ($i = 0; $i < $mname{$parts[1]}; $i++) { if (&isleap($parts[6])) { $since += $secs_leap_month[$i]; } else { $since += $secs_month[$i]; } } for ($i = 1; $i < $parts[2]; $i++) { $since += $secs_day; } for ($i = 0; $i < $parts[3]; $i++) { $since += $secs_hour; } for ($i = 0; $i < $parts[4]; $i++) { $since += 60; } $since += $parts[5]; return $since; } # # Comparison routines for sorting on the value of an entry in an # associative array # sub hostbyval # Sort on count of calls from this host { $host{$b} <=> $host{$a}; } sub filebyval # Sort on count of calls for this file { $file{$b} <=> $file{$a}; } # # Clip name, leaving domain # Or keep first three octets of IP address # sub chophost { local($h) = @_; local($r); $h =~ tr/[A-Z]/[a-z]/; # Force lowercase if (/([0-9]*\.){3}[0-9]*/) # Is it an IP address? { $h =~ /([0-9]+\.[0-9]+\.[0-9]+\.)[0-9]+/; $r = $1; } else { $h =~ /[^\s.]+\.([^\s]+)/; $r = $1; } return $r; } # # Increment appropriate counters # sub tally { / ([^\s]+) :/; # Extract host name from log line $th = &chophost($1); # Remove incriminating information $tf = $2 if /retrieved ([^\s]+) (.*)$/; $tf = "Root Connection" if /Root Connection/; $host{$th}++; $file{$tf}++; $callcnt++; if (! ($callcnt % 500)) { print STDERR "$callcnt records processed.\r"; } if ($firstline) { $firstline = 0; $firstdate = substr($_, $[, 24); } $lastdate = substr($_, $[, 24); } # # Print reports # sub report { foreach (keys %file) { $filecnt++; } foreach (keys %host) { $hostcnt++; } if ($haflag) { # Report domain names in alpha order $^ = "TOPHOSTALPHA"; # Set top-of-form format $~ = "HOSTALPHA"; # Set detail format local($i) = 0; foreach $key (sort keys(%host)) { $host = $key; $calls = $host{$key}; $pct = sprintf("%5.1f", ($calls / $callcnt) * 100); write; $i++; last if ($hlim && ($i == $hlim)); } $- = 0; # Force end-of-page } if ($hfflag) { # Report domain names in descending order of call frequency $^ = "TOPHOSTFREQ"; $~ = "HOSTFREQ"; local($i) = 0; local($tcalls) = 0; foreach $key (sort hostbyval keys(%host)) { $host = $key; $calls = $host{$key}; $tcalls += $calls; $pct = sprintf("%5.1f", ($calls / $callcnt) * 100); $cum = sprintf("%5.1f", ($tcalls / $callcnt) * 100); write; $i++; last if ($hlim && ($i == $hlim)); } $- = 0; } if ($faflag) { # Report file names in alpha order $^ = "TOPFILEALPHA"; $~ = "FILEALPHA"; local($i) = 0; foreach $key (sort keys(%file)) { $file = $key; $calls = $file{$key}; $pct = sprintf("%5.1f", ($calls / $callcnt) * 100); write; $i++; last if ($flim && ($i == $flim)); } $- = 0; } if ($ffflag) { # Report file names in descending order of frequency of use $^ = "TOPFILEFREQ"; $~ = "FILEFREQ"; local($i) = 0; local($tcalls) = 0; foreach $key (sort filebyval keys(%file)) { $file = $key; $calls = $file{$key}; $tcalls += $calls; $pct = sprintf("%5.1f", ($calls / $callcnt) * 100); $cum = sprintf("%5.1f", ($tcalls / $callcnt) * 100); write; $i++; last if ($flim && ($i == $flim)); } $- = 0; } if ($sumflag) { # Report cheesy counters $^ = "TOPSUMMARY"; $~ = "SUMMARY"; write; } } ############################################################################### $= = 55; # Max lines per page $[ = 0; # Array subscript base $firstline = 1; # Keep first date stamp in file $haflag = 1; # Want host-alpha report? $hfflag = 1; # Want host-freq report? $faflag = 1; # Want file-alpha report? $ffflag = 1; # Want file-freq report? $sumflag = 1; # Want summary report? $hlim = 0; # Unlimited number of hosts in report $flim = 0; # Unlimited number of files in report while ($arg = shift @ARGV) { if ($arg eq "-ha") { $haflag = 0; next; } if ($arg eq "-hf") { $hfflag = 0; next; } if ($arg eq "-fa") { $faflag = 0; next; } if ($arg eq "-ff") { $ffflag = 0; next; } if ($arg eq "-sum") { $sumflag = 0; next; } if ($arg eq "-hlim") { $hlim = shift @ARGV; next; } if ($arg eq "-flim") { $flim = shift @ARGV; next; } print STDERR "$ARGV[0]: unrecognized argument: $arg\n"; exit(1); } while (<>) # Gobble the whole log file { next if /^$/; # Throw away blank lines next if /Starting gopher daemon/; # Throw away start lines &tally; # Keep count } print STDERR "$callcnt records processed.\n"; $startjul = &epoch($firstdate); $endjul = &epoch($lastdate); $period = $endjul - $startjul; $callspace = $period / $callcnt; &report; # Print reports exit; ############################################################################### format TOPHOSTALPHA = Michigan State University Gopher Log Analyzer Page: @<< $% Client Domains by Name Domain Domain or IP Address Calls % ----------------------------------------------------- ------- ----- . format HOSTALPHA = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>> $host $calls $pct . format TOPHOSTFREQ = Michigan State University Gopher Log Analyzer Page: @<< $% Client Domains by Frequency of Use Host Domain or IP Address Calls % Cum ----------------------------------------------------- ------- ----- ----- . format HOSTFREQ = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>> @>>>> $host $calls $pct $cum . format TOPFILEALPHA = Michigan State University Gopher Log Analyzer Page: @<< $% Retrieved Items by Name File Description Uses % ----------------------------------------------------- ------- ----- . format FILEALPHA = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>> $file $calls $pct . format TOPFILEFREQ = Michigan State University Gopher Log Analyzer Page: @<< $% Retrieved Items by Frequency of Use File Description Uses % Cum ----------------------------------------------------- ------- ----- ----- . format FILEFREQ = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>> @>>>> $file $calls $pct $cum . format TOPSUMMARY = Michigan State University Gopher Log Analyzer Page: @<< $% Report Summary . format SUMMARY = Starting date: @<<<<<<<<<<<<<<<<<<<<<<<< $firstdate Ending date: @<<<<<<<<<<<<<<<<<<<<<<<< $lastdate Total calls: @<<<<<<<<< $callcnt Average seconds between calls: @<<<<<< $callspace Different domains: @<<<<<<<<< $hostcnt Different files: @<<<<<<<<< $filecnt .