1#!/usr/local/bin/perl 2# ******************************************************************** 3# * COPYRIGHT: 4# * © 2016 and later: Unicode, Inc. and others. 5# * License & terms of use: http://www.unicode.org/copyright.html 6# * Copyright (c) 2006, International Business Machines Corporation and 7# * others. All Rights Reserved. 8# ******************************************************************** 9 10package Dataset; 11use Statistics::Descriptive; 12use Statistics::Distributions; 13use strict; 14 15# Create a new Dataset with the given data. 16sub new { 17 my ($class) = shift; 18 my $self = bless { 19 _data => \@_, 20 _scale => 1.0, 21 _mean => 0.0, 22 _error => 0.0, 23 }, $class; 24 25 my $n = @_; 26 27 if ($n >= 1) { 28 my $stats = Statistics::Descriptive::Full->new(); 29 $stats->add_data(@{$self->{_data}}); 30 $self->{_mean} = $stats->mean(); 31 32 if ($n >= 2) { 33 # Use a t distribution rather than Gaussian because (a) we 34 # assume an underlying normal dist, (b) we do not know the 35 # standard deviation -- we estimate it from the data, and (c) 36 # we MAY have a small sample size (also works for large n). 37 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 38 $self->{_error} = $t * $stats->standard_deviation(); 39 } 40 } 41 42 $self; 43} 44 45# Set a scaling factor for all data; 1.0 means no scaling. 46# Scale must be > 0. 47sub setScale { 48 my ($self, $scale) = @_; 49 $self->{_scale} = $scale; 50} 51 52# Multiply the scaling factor by a value. 53sub scaleBy { 54 my ($self, $a) = @_; 55 $self->{_scale} *= $a; 56} 57 58# Return the mean. 59sub getMean { 60 my $self = shift; 61 return $self->{_mean} * $self->{_scale}; 62} 63 64# Return a 99% error based on the t distribution. The dataset 65# is described as getMean() +/- getError(). 66sub getError { 67 my $self = shift; 68 return $self->{_error} * $self->{_scale}; 69} 70 71# Divide two Datasets and return a new one, maintaining the 72# mean+/-error. The new Dataset has no data points. 73sub divide { 74 my $self = shift; 75 my $rhs = shift; 76 77 my $minratio = ($self->{_mean} - $self->{_error}) / 78 ($rhs->{_mean} + $rhs->{_error}); 79 my $maxratio = ($self->{_mean} + $self->{_error}) / 80 ($rhs->{_mean} - $rhs->{_error}); 81 82 my $result = Dataset->new(); 83 $result->{_mean} = ($minratio + $maxratio) / 2; 84 $result->{_error} = $result->{_mean} - $minratio; 85 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 86 $result; 87} 88 89# subtracts two Datasets and return a new one, maintaining the 90# mean+/-error. The new Dataset has no data points. 91sub subtract { 92 my $self = shift; 93 my $rhs = shift; 94 95 my $result = Dataset->new(); 96 $result->{_mean} = $self->{_mean} - $rhs->{_mean}; 97 $result->{_error} = $self->{_error} + $rhs->{_error}; 98 $result->{_scale} = $self->{_scale}; 99 $result; 100} 101 102# adds two Datasets and return a new one, maintaining the 103# mean+/-error. The new Dataset has no data points. 104sub add { 105 my $self = shift; 106 my $rhs = shift; 107 108 my $result = Dataset->new(); 109 $result->{_mean} = $self->{_mean} + $rhs->{_mean}; 110 $result->{_error} = $self->{_error} + $rhs->{_error}; 111 $result->{_scale} = $self->{_scale}; 112 $result; 113} 114 115# Divides a dataset by a scalar. 116# The new Dataset has no data points. 117sub divideByScalar { 118 my $self = shift; 119 my $s = shift; 120 121 my $result = Dataset->new(); 122 $result->{_mean} = $self->{_mean}/$s; 123 $result->{_error} = $self->{_error}/$s; 124 $result->{_scale} = $self->{_scale}; 125 $result; 126} 127 128# Divides a dataset by a scalar. 129# The new Dataset has no data points. 130sub multiplyByScalar { 131 my $self = shift; 132 my $s = shift; 133 134 my $result = Dataset->new(); 135 $result->{_mean} = $self->{_mean}*$s; 136 $result->{_error} = $self->{_error}*$s; 137 $result->{_scale} = $self->{_scale}; 138 $result; 139} 140 1411; 142