1#/** 2# ******************************************************************************* 3# * Copyright (C) 2002-2004, International Business Machines Corporation and * 4# * others. All Rights Reserved. * 5# ******************************************************************************* 6# */ 7package Dataset; 8use Statistics::Descriptive; 9use Statistics::Distributions; 10use strict; 11 12# Create a new Dataset with the given data. 13sub new { 14 my ($class) = shift; 15 my $self = bless { 16 _data => \@_, 17 _scale => 1.0, 18 _mean => 0.0, 19 _error => 0.0, 20 }, $class; 21 22 my $n = @_; 23 24 if ($n >= 1) { 25 my $stats = Statistics::Descriptive::Full->new(); 26 $stats->add_data(@{$self->{_data}}); 27 $self->{_mean} = $stats->mean(); 28 29 if ($n >= 2) { 30 # Use a t distribution rather than Gaussian because (a) we 31 # assume an underlying normal dist, (b) we do not know the 32 # standard deviation -- we estimate it from the data, and (c) 33 # we MAY have a small sample size (also works for large n). 34 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 35 $self->{_error} = $t * $stats->standard_deviation(); 36 } 37 } 38 39 $self; 40} 41 42# Set a scaling factor for all data; 1.0 means no scaling. 43# Scale must be > 0. 44sub setScale { 45 my ($self, $scale) = @_; 46 $self->{_scale} = $scale; 47} 48 49# Multiply the scaling factor by a value. 50sub scaleBy { 51 my ($self, $a) = @_; 52 $self->{_scale} *= $a; 53} 54 55# Return the mean. 56sub getMean { 57 my $self = shift; 58 return $self->{_mean} * $self->{_scale}; 59} 60 61# Return a 99% error based on the t distribution. The dataset 62# is desribed as getMean() +/- getError(). 63sub getError { 64 my $self = shift; 65 return $self->{_error} * $self->{_scale}; 66} 67 68# Divide two Datasets and return a new one, maintaining the 69# mean+/-error. The new Dataset has no data points. 70sub divide { 71 my $self = shift; 72 my $rhs = shift; 73 74 my $minratio = ($self->{_mean} - $self->{_error}) / 75 ($rhs->{_mean} + $rhs->{_error}); 76 my $maxratio = ($self->{_mean} + $self->{_error}) / 77 ($rhs->{_mean} - $rhs->{_error}); 78 79 my $result = Dataset->new(); 80 $result->{_mean} = ($minratio + $maxratio) / 2; 81 $result->{_error} = $result->{_mean} - $minratio; 82 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 83 $result; 84} 85 861; 87