#!/usr/bin/perl


use strict;
use warnings;

our $max = 10;
our $size = 10000;

our $mean  = 20;
our $stdev = 3;

# This function returns two numbers (0 to 1)
# from a uniform distribution

sub getUniformPair
  {
    my $a = rand(1);
    my $b = rand(1);
    return ($a,$b);
  }

# This function accepts two number (0 to 1) from a
# uniform distribution and returns two numbers from
# the standard normal distribution. (mean 0, variance 1)

sub getNormalPair
  {
    my $a = shift;
    my $b = shift;

    my $pi = 3.14159265359;

    # Box-Muller Transformation
    my $x = sqrt(-2 * log($a)) * cos(2*$pi*$b);
    my $y = sqrt(-2 * log($a)) * sin(2*$pi*$b);

    return ($x,$y);
  }

# convert the normal distribution to our 
# distribution for our sigma an mu
sub scale
  {
    my $x0 = shift;
    my $mu = shift;
    my $sigma = shift;

    my $x1 = $x0 * $sigma + $mu;

    return $x1;
  }

# this function will round a number to one 
# decimal place and add a + or - sign.
sub round
  {
    my $n = shift;
    $n = int($n*10)/10;
    $n = sprintf("%+1.1f",$n);
    return $n;
  }


my %count = ();


# since we generate two numbers each loop, we will only
# loop up to $size/2

for (my $i=0; $i<($size/2); $i++)
  {

    # obtain two random numbers
    my ($a,$b) = getUniformPair();

    # transform to normal pair
    ($a,$b) = getNormalPair($a,$b);

    # transform to our distribution
    $a = scale($a,$mean,$stdev);
    $b = scale($b,$mean,$stdev);

    # record the frequency for later analysis
    $count{round($a)}++;
    $count{round($b)}++;
  }

# print the frequency for $mean +/- 4*$stdev
for (my $i=10*($mean - 4*$stdev); $i<=10*($mean + 4*$stdev); $i++)
  {
    my $n = round($i/10);
    print $n . "\t" . ( $count{$n} || 0 ) . "\n";
  }

print "\n\n";

# Next we will test the distribution

# we will create a hash of buckets
# each bucket will represent one stdev
# the index will be -4,-3,-2,-1,0,1,2,3,4
my %bucket = ();

my $total = 0;

for (my $i=10*($mean - 4*$stdev); $i<=10*($mean + 4*$stdev); $i++)
  {
    my $n = round($i/10);

    # in which bucket should this be counted
    my $index = int(($n  - $mean)/$stdev);

    # truncate the $n to total the frequency for 
    # ranges represented by integers.  For example,
    # the frequency for 3.2 will be added in the 
    # bucket for 3.

    $count{$n} = 0 unless defined $count{$n};

    $bucket{$index} += $count{$n};

    # count the total (this should be 2 * $size)
    $total = $total + $count{$n};
    # print $n . "\t" . $count{$n} . "\n"; 
  }

# Now we can calcualte the percentage for each of these integer ranges.
# If this is indeed a normal distribution, we should see something
# similar to the following
# 
#  -4    
#  -2     2
#  -1    14
#   0    68
#  +1    14
#  +2     2
#  +3
# 

print "\n\nDo we see the 68-95-98 trend?\n\n";

for (my $i=-4; $i<4; $i++)
  {
    my $ratio =  int(0.5 + 100 * $bucket{$i}/$total);
    $ratio = sprintf("%2d",$ratio);
    print $i . "\t" . $bucket{$i} . "\t" . $ratio . " %\n";
  }