-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsummary.pl
145 lines (120 loc) · 3.33 KB
/
summary.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/perl
=pod
=head1 NAME
summary.pl - Print a summary of the apache logs
=head1 SYNOPSIS
summary.pl <file> [<file> ...]
=head1 DESCRIPTION
The I<summary.pl> reads the Apache access logs and prints a summary
of the information in them.
=head1 EXAMPLES
summary.pl /var/log/httpd/access*
Top hosts accessing the system
Hits Who
4458 207.46.98.144
1745 64.242.88.50
1648 128.194.135.83
1362 210.173.179.39
1318 200.55.147.70
1054 209.218.171.51
794 193.22.65.1
725 207.68.146.56
688 65.214.44.161
672 210.173.179.68
Top URLs accessed
Hits What
23276 /vim-cook.html
9170 /robots.txt
7579 /style/index.html
6036 /
5815 /style/
3599 /style/styleTOC.pdf
3335 /style/c01.pdf
3045 /sw/index.html
2774 /style/c02.pdf
2759 /style/c07.pdf
=head1 AUTHOR
Steve Oualline, E<lt>[email protected]<gt>.
=head1 COPYRIGHT
Copyright 2005 Steve Oualline.
This program is distributed under the GPL.
=cut
#
# summary.pl -- Print a summary of the apache logs.
#
# Summary includes top urls accessed
# and the top people who accessed the site (by access)
#
# Usage:
# summary.pl <access_log> [<access_log> ...]
#
use strict;
use warnings;
my %access_count; # Key -> who, value => count
my %page_count; # Key -> Page, value => count
while (<>) {
# Skip unknown lines
# +++----------------------- Non-spaces
# +|||+---------------------- Put in $1
# |||||++++------------------ All but "
# ||||||||| +++++----------- All but "
# ||||||||| +|||||+---------- Put in $2
# |||||||||+|||||||+--------- Inside ""
# ||||||||||||||||||--------- One+ digits
# ||||||||||||||||||+++------ Spaces
# ||||||||||||||||||||| +++-- One+ digits
# |||||||||||||||||||||+|||+- Result in $3
if ($_ !~ /(\S+)[^"]*"([^"]*)"\s*(\d+)/) {
next;
}
my $host = $1; # The accessing host
my $access = $2; # The url fetched
my $error_code = $3;# The error code
if ($error_code != 200) {
next; # Skip all access that are not OK
}
# Turn the info into parts we can use
my @access_info = split /\s+/, $access;
$access_count{$host}++;
$page_count{$access_info[1]}++;
}
my @access_array; # Access list as an array
# Turn access hash into an array
foreach my $access (keys %access_count) {
push(@access_array, {
host => $access,
count => $access_count{$access}
});
}
# Get the "top" items
my @access_top =
sort { $b->{count} <=> $a->{count} } @access_array;
print "Top hosts accessing the system\n";
print "Hits Who\n";
for (my $i = 0; $i < 10; ++$i) {
if (not defined($access_top[$i])) {
last;
}
print "$access_top[$i]->{count}\t",
"$access_top[$i]->{host}\n";
}
#----------------------------------------------------------
my @page_array; # Page list as an array
# Turn page hash into an array
foreach my $page (keys %page_count) {
push(@page_array, {
url => $page,
count => $page_count{$page}
});
}
# Get the "top" items
my @page_top =
sort { $b->{count} <=> $a->{count} } @page_array;
print "Top URLs accessed\n";
print "Hits What\n";
for (my $i = 0; $i < 10; ++$i) {
if (not defined($page_top[$i])) {
last;
}
print "$page_top[$i]->{count}\t$page_top[$i]->{url}\n";
}