#!/usr/bin/perl
use warnings;
use strict;

=head1 NAME

 spread_stats.pl  - A tool to examine NFL point spreads. 

=head1 SYNOPSIS

 spread_stats.pl [options] [year] [config_file]
 Options:
        --help		brief help message
        --man		full documentation
	--range		allows you to enter a range (say 5-9) 
			of weeks to analyze. The range is a pair 
			of numbers separated by a '-'.

Notes: Year defaults to 2007.
config_file defaults to 'nfl.config' in the current directory.

=head1 VERSION

 author   dwmyers
 date     10/17/2007
 modified 11/03/2007

=head1 DESCRIPTION

 spread_stats.pl  - A tool to examine NFL point spreads. 
 This program takes data from a MySQL database and does analysis of team
 strength based on record and point spread.

=head1 COPYRIGHT

    This program is copyrighted 2007 by David Myers. All rights are
    reserved.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

=cut

use DBI;
use Config::Simple;
use Getopt::Long;
use Pod::Usage;
use Statistics::Descriptive;

my $help  = 0;
my $man   = 0;
my $range = 0;

GetOptions(
    'help|?'  => \$help,
    man       => \$man,
    "range=s" => \$range,
) or pod2usage(2);

pod2usage( -exitval => 0, -verbose => 1 ) if $help;
pod2usage( -exitval => 0, -verbose => 2 ) if $man;

my $year        = shift || 2007;
my $config_file = shift || 'nfl.config';

my %db_hash;
Config::Simple->import_from( $config_file, \%db_hash );

my $db    = "nfl_$year";
my $user  = $db_hash{"${db}.user"};
my $pass  = $db_hash{"${db}.pass"};
my $table = $db_hash{"${db}.table"} || "games";

my %teams = ();
my %stats = (
    games_played     => 0,
    wins             => 0,
    win_pct          => 0.0,
    losses           => 0,
    ties             => 0,
    points_for       => 0,
    points_against   => 0,
    avg_pts_for      => 0.0,
    avg_pts_against  => 0.0,
    point_spread     => 0,
    avg_point_spread => 0.0,
);
my %totals = (
    games        => 0,
    win_score    => 0,
    losing_score => 0,
    home_wins    => 0,
    visit_wins   => 0,
    win_margin   => 0,
);

my $dbh = DBI->connect( "DBI:mysql:$db", "$user", "$pass" )
  or die("Cannot connect to the database");
my $sql = "SELECT visitor, visit_score, home, home_score from $table";
if ($range) {
    if ( $range =~ /-/ ) {
        my ( $start_week, $end_week ) = split /-/, $range, 2;
        if ( $start_week =~ /\d\d?/ and $end_week =~ /\d\d?/ ) {
            $start_week = sanitize($start_week);
            $end_week   = sanitize($end_week);
            $sql =
                "SELECT visitor, visit_score, "
              . "home, home_score from $table "
              . "where week >= $start_week and week <= $end_week";
        }
    }
}
my $sth = $dbh->prepare($sql);
$sth->execute;
while ( my ( $visitor, $visit_score, $home_team, $home_score ) =
    $sth->fetchrow_array() )
{
    $totals{games}++;
    my $diff = $visit_score - $home_score;
    $teams{$visitor}   = {%stats} unless $teams{$visitor};
    $teams{$home_team} = {%stats} unless $teams{$home_team};
    $teams{$visitor}{games_played}++;
    $teams{$home_team}{games_played}++;
    $teams{$visitor}{point_spread} += $diff;
    $teams{$home_team}{point_spread} -= $diff;
    $teams{$visitor}{points_for}       += $visit_score;
    $teams{$visitor}{points_against}   += $home_score;
    $teams{$home_team}{points_for}     += $home_score;
    $teams{$home_team}{points_against} += $visit_score;
    push @{$teams{$visitor}{arr_point_spread}}, $diff;
    push @{$teams{$home_team}{arr_point_spread}}, -$diff;
    push @{$teams{$visitor}{arr_points_for}}, $visit_score;
    push @{$teams{$visitor}{arr_points_against}}, $home_score;
    push @{$teams{$home_team}{arr_points_for}}, $home_score;
    push @{$teams{$home_team}{arr_points_against}}, $visit_score;

    if ( $diff > 0 ) {
        $teams{$visitor}{wins}++;
        $teams{$home_team}{losses}++;
        $totals{win_score}    += $visit_score;
        $totals{losing_score} += $home_score;
        $totals{win_margin}   += $diff;
        $totals{visit_wins}++;
    }
    elsif ( $diff == 0 ) {
        $teams{$visitor}{ties}++;
        $teams{$home_team}{ties}++;
    }
    else {
        $teams{$visitor}{losses}++;
        $teams{$home_team}{wins}++;
        $totals{win_score}    += $home_score;
        $totals{losing_score} += $visit_score;
        $totals{win_margin}   += abs($diff);
        $totals{home_wins}++;
    }
}
$dbh->disconnect;

print "Global Statistics:\n";
my $avg_win    = $totals{win_score} * 1.0 / $totals{games};
my $avg_loss   = $totals{losing_score} * 1.0 / $totals{games};
my $avg_margin = $totals{win_margin} * 1.0 / $totals{games};
print "Games  Home Wins Winning_Score Losing_Score Margin\n";
printf "%-4d     %4d       %6.2f        %6.2f    %6.2f\n", $totals{games},
  $totals{home_wins}, $avg_win, $avg_loss, $avg_margin;

for ( sort keys %teams ) {
    $teams{$_}{win_pct} =
      ( 100.0 * $teams{$_}{wins} ) / $teams{$_}{games_played};
    $teams{$_}{avg_point_spread} =
      ( $teams{$_}{point_spread} * 1.0 ) / $teams{$_}{games_played};
    $teams{$_}{avg_pts_for} =
      ( $teams{$_}{points_for} * 1.0 ) / $teams{$_}{games_played};
    $teams{$_}{avg_pts_against} =
      ( $teams{$_}{points_against} * 1.0 ) / $teams{$_}{games_played};
}

my $rank = 0;
print "\n\nRank  Team    Spread   Median    StdErr TrMean \n";
print "---------------------------------------------------\n";
for (
    sort {
             $teams{$b}{win_pct} <=> $teams{$a}{win_pct}
          or $teams{$b}{avg_point_spread} <=> $teams{$a}{avg_point_spread}
          or $teams{$b}{avg_pts_for} <=> $teams{$a}{avg_pts_for}
          or $teams{$a}{avg_pts_against} <=> $teams{$b}{avg_pts_against}
    }
    keys %teams
  )
{
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@{$teams{$_}{arr_point_spread}});
    my $mean = $stat->mean();
    my $median = $stat->median();
    my $stderr = $stat->standard_deviation();
    my $trim = $stat->trimmed_mean(0.25);
    $rank++;
    printf "%-2d    %-3s    %6.2f   %5.1f   +/-%6.2f %6.2f \n", 
$rank, $_, $mean, $median, $stderr, $trim;
}

sub sanitize {
    my $num = shift;
    $num = 1  if ( $num < 1 );
    $num = 21 if ( $num > 21 );
    return $num;
}