#!/usr/bin/perl
use warnings;
use strict;
=head1 NAME
spread_stats.pl - A tool to examine NFL point spreads.
=head1 SYNOPSIS
spread_stats.pl [options] [year] [config_file]
Options:
--help brief help message
--man full documentation
--range allows you to enter a range (say 5-9)
of weeks to analyze. The range is a pair
of numbers separated by a '-'.
=head1 VERSION
author dwmyers
date 10/17/2007
modified 11/03/2007
=head1 DESCRIPTION
spread_stats.pl - A tool to examine NFL point spreads.
This program takes data from a MySQL database and does analysis of team
strength based on record and point spread.
=head1 COPYRIGHT
This program is copyrighted 2007 by David Myers. All rights are
reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut
use DBI;
use Config::Simple;
use Getopt::Long;
use Pod::Usage;
use Statistics::Descriptive;
my $help = 0;
my $man = 0;
my $range = 0;
GetOptions(
'help|?' => \$help,
man => \$man,
"range=s" => \$range,
) or pod2usage(2);
pod2usage( -exitval => 0, -verbose => 1 ) if $help;
pod2usage( -exitval => 0, -verbose => 2 ) if $man;
my $year = shift || 2007;
my $config_file = shift || 'nfl.config';
my %db_hash;
Config::Simple->import_from( $config_file, \%db_hash );
my $db = "nfl_$year";
my $user = $db_hash{"${db}.user"};
my $pass = $db_hash{"${db}.pass"};
my $table = $db_hash{"${db}.table"} || "games";
my %teams = ();
my %stats = (
games_played => 0,
wins => 0,
win_pct => 0.0,
losses => 0,
ties => 0,
points_for => 0,
points_against => 0,
avg_pts_for => 0.0,
avg_pts_against => 0.0,
point_spread => 0,
avg_point_spread => 0.0,
);
my %totals = (
games => 0,
win_score => 0,
losing_score => 0,
home_wins => 0,
visit_wins => 0,
win_margin => 0,
);
my $dbh = DBI->connect( "DBI:mysql:$db", "$user", "$pass" )
or die("Cannot connect to the database");
my $sql = "SELECT visitor, visit_score, home, home_score from $table";
if ($range) {
if ( $range =~ /-/ ) {
my ( $start_week, $end_week ) = split /-/, $range, 2;
if ( $start_week =~ /\d\d?/ and $end_week =~ /\d\d?/ ) {
$start_week = sanitize($start_week);
$end_week = sanitize($end_week);
$sql =
"SELECT visitor, visit_score, "
. "home, home_score from $table "
. "where week >= $start_week and week <= $end_week";
}
}
}
my $sth = $dbh->prepare($sql);
$sth->execute;
while ( my ( $visitor, $visit_score, $home_team, $home_score ) =
$sth->fetchrow_array() )
{
$totals{games}++;
my $diff = $visit_score - $home_score;
$teams{$visitor} = {%stats} unless $teams{$visitor};
$teams{$home_team} = {%stats} unless $teams{$home_team};
$teams{$visitor}{games_played}++;
$teams{$home_team}{games_played}++;
$teams{$visitor}{point_spread} += $diff;
$teams{$home_team}{point_spread} -= $diff;
$teams{$visitor}{points_for} += $visit_score;
$teams{$visitor}{points_against} += $home_score;
$teams{$home_team}{points_for} += $home_score;
$teams{$home_team}{points_against} += $visit_score;
push @{$teams{$visitor}{arr_point_spread}}, $diff;
push @{$teams{$home_team}{arr_point_spread}}, -$diff;
push @{$teams{$visitor}{arr_points_for}}, $visit_score;
push @{$teams{$visitor}{arr_points_against}}, $home_score;
push @{$teams{$home_team}{arr_points_for}}, $home_score;
push @{$teams{$home_team}{arr_points_against}}, $visit_score;
if ( $diff > 0 ) {
$teams{$visitor}{wins}++;
$teams{$home_team}{losses}++;
$totals{win_score} += $visit_score;
$totals{losing_score} += $home_score;
$totals{win_margin} += $diff;
$totals{visit_wins}++;
}
elsif ( $diff == 0 ) {
$teams{$visitor}{ties}++;
$teams{$home_team}{ties}++;
}
else {
$teams{$visitor}{losses}++;
$teams{$home_team}{wins}++;
$totals{win_score} += $home_score;
$totals{losing_score} += $visit_score;
$totals{win_margin} += abs($diff);
$totals{home_wins}++;
}
}
$dbh->disconnect;
print "Global Statistics:\n";
my $avg_win = $totals{win_score} * 1.0 / $totals{games};
my $avg_loss = $totals{losing_score} * 1.0 / $totals{games};
my $avg_margin = $totals{win_margin} * 1.0 / $totals{games};
print "Games Home Wins Winning_Score Losing_Score Margin\n";
printf "%-4d %4d %6.2f %6.2f %6.2f\n", $totals{games},
$totals{home_wins}, $avg_win, $avg_loss, $avg_margin;
for ( sort keys %teams ) {
$teams{$_}{win_pct} =
( 100.0 * $teams{$_}{wins} ) / $teams{$_}{games_played};
$teams{$_}{avg_point_spread} =
( $teams{$_}{point_spread} * 1.0 ) / $teams{$_}{games_played};
$teams{$_}{avg_pts_for} =
( $teams{$_}{points_for} * 1.0 ) / $teams{$_}{games_played};
$teams{$_}{avg_pts_against} =
( $teams{$_}{points_against} * 1.0 ) / $teams{$_}{games_played};
}
my $rank = 0;
print "\n\nRank Team Spread Median StdErr TrMean \n";
print "---------------------------------------------------\n";
for (
sort {
$teams{$b}{win_pct} <=> $teams{$a}{win_pct}
or $teams{$b}{avg_point_spread} <=> $teams{$a}{avg_point_spread}
or $teams{$b}{avg_pts_for} <=> $teams{$a}{avg_pts_for}
or $teams{$a}{avg_pts_against} <=> $teams{$b}{avg_pts_against}
}
keys %teams
)
{
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(@{$teams{$_}{arr_point_spread}});
my $mean = $stat->mean();
my $median = $stat->median();
my $stderr = $stat->standard_deviation();
my $trim = $stat->trimmed_mean(0.25);
$rank++;
printf "%-2d %-3s %6.2f %5.1f +/-%6.2f %6.2f \n",
$rank, $_, $mean, $median, $stderr, $trim;
}
sub sanitize {
my $num = shift;
$num = 1 if ( $num < 1 );
$num = 21 if ( $num > 21 );
return $num;
}