#!/usr/bin/perl use strict; use warnings; our $max = 10; our $size = 10000; our $mean = 20; our $stdev = 3; # This function returns two numbers (0 to 1) # from a uniform distribution sub getUniformPair { my $a = rand(1); my $b = rand(1); return ($a,$b); } # This function accepts two number (0 to 1) from a # uniform distribution and returns two numbers from # the standard normal distribution. (mean 0, variance 1) sub getNormalPair { my $a = shift; my $b = shift; my $pi = 3.14159265359; # Box-Muller Transformation my $x = sqrt(-2 * log($a)) * cos(2*$pi*$b); my $y = sqrt(-2 * log($a)) * sin(2*$pi*$b); return ($x,$y); } # convert the normal distribution to our # distribution for our sigma an mu sub scale { my $x0 = shift; my $mu = shift; my $sigma = shift; my $x1 = $x0 * $sigma + $mu; return $x1; } # this function will round a number to one # decimal place and add a + or - sign. sub round { my $n = shift; $n = int($n*10)/10; $n = sprintf("%+1.1f",$n); return $n; } my %count = (); # since we generate two numbers each loop, we will only # loop up to $size/2 for (my $i=0; $i<($size/2); $i++) { # obtain two random numbers my ($a,$b) = getUniformPair(); # transform to normal pair ($a,$b) = getNormalPair($a,$b); # transform to our distribution $a = scale($a,$mean,$stdev); $b = scale($b,$mean,$stdev); # record the frequency for later analysis $count{round($a)}++; $count{round($b)}++; } # print the frequency for $mean +/- 4*$stdev for (my $i=10*($mean - 4*$stdev); $i<=10*($mean + 4*$stdev); $i++) { my $n = round($i/10); print $n . "\t" . ( $count{$n} || 0 ) . "\n"; } print "\n\n"; # Next we will test the distribution # we will create a hash of buckets # each bucket will represent one stdev # the index will be -4,-3,-2,-1,0,1,2,3,4 my %bucket = (); my $total = 0; for (my $i=10*($mean - 4*$stdev); $i<=10*($mean + 4*$stdev); $i++) { my $n = round($i/10); # in which bucket should this be counted my $index = int(($n - $mean)/$stdev); # truncate the $n to total the frequency for # ranges represented by integers. For example, # the frequency for 3.2 will be added in the # bucket for 3. $count{$n} = 0 unless defined $count{$n}; $bucket{$index} += $count{$n}; # count the total (this should be 2 * $size) $total = $total + $count{$n}; # print $n . "\t" . $count{$n} . "\n"; } # Now we can calcualte the percentage for each of these integer ranges. # If this is indeed a normal distribution, we should see something # similar to the following # # -4 # -2 2 # -1 14 # 0 68 # +1 14 # +2 2 # +3 # print "\n\nDo we see the 68-95-98 trend?\n\n"; for (my $i=-4; $i<4; $i++) { my $ratio = int(0.5 + 100 * $bucket{$i}/$total); $ratio = sprintf("%2d",$ratio); print $i . "\t" . $bucket{$i} . "\t" . $ratio . " %\n"; }