1#/** 2# * © 2016 and later: Unicode, Inc. and others. 3# * License & terms of use: http://www.unicode.org/copyright.html 4# ******************************************************************************* 5# * Copyright (C) 2002-2004, International Business Machines Corporation and * 6# * others. All Rights Reserved. * 7# ******************************************************************************* 8# */ 9package Dataset; 10use Statistics::Descriptive; 11use Statistics::Distributions; 12use strict; 13 14# Create a new Dataset with the given data. 15sub new { 16 my ($class) = shift; 17 my $self = bless { 18 _data => \@_, 19 _scale => 1.0, 20 _mean => 0.0, 21 _error => 0.0, 22 }, $class; 23 24 my $n = @_; 25 26 if ($n >= 1) { 27 my $stats = Statistics::Descriptive::Full->new(); 28 $stats->add_data(@{$self->{_data}}); 29 $self->{_mean} = $stats->mean(); 30 31 if ($n >= 2) { 32 # Use a t distribution rather than Gaussian because (a) we 33 # assume an underlying normal dist, (b) we do not know the 34 # standard deviation -- we estimate it from the data, and (c) 35 # we MAY have a small sample size (also works for large n). 36 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 37 $self->{_error} = $t * $stats->standard_deviation(); 38 } 39 } 40 41 $self; 42} 43 44# Set a scaling factor for all data; 1.0 means no scaling. 45# Scale must be > 0. 46sub setScale { 47 my ($self, $scale) = @_; 48 $self->{_scale} = $scale; 49} 50 51# Multiply the scaling factor by a value. 52sub scaleBy { 53 my ($self, $a) = @_; 54 $self->{_scale} *= $a; 55} 56 57# Return the mean. 58sub getMean { 59 my $self = shift; 60 return $self->{_mean} * $self->{_scale}; 61} 62 63# Return a 99% error based on the t distribution. The dataset 64# is desribed as getMean() +/- getError(). 65sub getError { 66 my $self = shift; 67 return $self->{_error} * $self->{_scale}; 68} 69 70# Divide two Datasets and return a new one, maintaining the 71# mean+/-error. The new Dataset has no data points. 72sub divide { 73 my $self = shift; 74 my $rhs = shift; 75 76 my $minratio = ($self->{_mean} - $self->{_error}) / 77 ($rhs->{_mean} + $rhs->{_error}); 78 my $maxratio = ($self->{_mean} + $self->{_error}) / 79 ($rhs->{_mean} - $rhs->{_error}); 80 81 my $result = Dataset->new(); 82 $result->{_mean} = ($minratio + $maxratio) / 2; 83 $result->{_error} = $result->{_mean} - $minratio; 84 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 85 $result; 86} 87 881; 89