• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/local/bin/perl
2#  ********************************************************************
3#  * COPYRIGHT:
4#  * © 2016 and later: Unicode, Inc. and others.
5#  * License & terms of use: http://www.unicode.org/copyright.html#License
6#  * Copyright (c) 2006, International Business Machines Corporation and
7#  * others. All Rights Reserved.
8#  ********************************************************************
9
10package Dataset;
11use Statistics::Descriptive;
12use Statistics::Distributions;
13use strict;
14
15# Create a new Dataset with the given data.
16sub new {
17    my ($class) = shift;
18    my $self = bless {
19        _data => \@_,
20        _scale => 1.0,
21        _mean => 0.0,
22        _error => 0.0,
23    }, $class;
24
25    my $n = @_;
26
27    if ($n >= 1) {
28        my $stats = Statistics::Descriptive::Full->new();
29        $stats->add_data(@{$self->{_data}});
30        $self->{_mean} = $stats->mean();
31
32        if ($n >= 2) {
33            # Use a t distribution rather than Gaussian because (a) we
34            # assume an underlying normal dist, (b) we do not know the
35            # standard deviation -- we estimate it from the data, and (c)
36            # we MAY have a small sample size (also works for large n).
37            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
38            $self->{_error} = $t * $stats->standard_deviation();
39        }
40    }
41
42    $self;
43}
44
45# Set a scaling factor for all data; 1.0 means no scaling.
46# Scale must be > 0.
47sub setScale {
48    my ($self, $scale) = @_;
49    $self->{_scale} = $scale;
50}
51
52# Multiply the scaling factor by a value.
53sub scaleBy {
54    my ($self, $a) = @_;
55    $self->{_scale} *= $a;
56}
57
58# Return the mean.
59sub getMean {
60    my $self = shift;
61    return $self->{_mean} * $self->{_scale};
62}
63
64# Return a 99% error based on the t distribution.  The dataset
65# is desribed as getMean() +/- getError().
66sub getError {
67    my $self = shift;
68    return $self->{_error} * $self->{_scale};
69}
70
71# Divide two Datasets and return a new one, maintaining the
72# mean+/-error.  The new Dataset has no data points.
73sub divide {
74    my $self = shift;
75    my $rhs = shift;
76
77    my $minratio = ($self->{_mean} - $self->{_error}) /
78                   ($rhs->{_mean} + $rhs->{_error});
79    my $maxratio = ($self->{_mean} + $self->{_error}) /
80                   ($rhs->{_mean} - $rhs->{_error});
81
82    my $result = Dataset->new();
83    $result->{_mean} = ($minratio + $maxratio) / 2;
84    $result->{_error} = $result->{_mean} - $minratio;
85    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
86    $result;
87}
88
89# subtracts two Datasets and return a new one, maintaining the
90# mean+/-error.  The new Dataset has no data points.
91sub subtract {
92    my $self = shift;
93    my $rhs = shift;
94
95    my $result = Dataset->new();
96    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
97    $result->{_error} = $self->{_error} + $rhs->{_error};
98    $result->{_scale} = $self->{_scale};
99    $result;
100}
101
102# adds two Datasets and return a new one, maintaining the
103# mean+/-error.  The new Dataset has no data points.
104sub add {
105    my $self = shift;
106    my $rhs = shift;
107
108    my $result = Dataset->new();
109    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
110    $result->{_error} = $self->{_error} + $rhs->{_error};
111    $result->{_scale} = $self->{_scale};
112    $result;
113}
114
115# Divides a dataset by a scalar.
116# The new Dataset has no data points.
117sub divideByScalar {
118    my $self = shift;
119    my $s = shift;
120
121    my $result = Dataset->new();
122    $result->{_mean} = $self->{_mean}/$s;
123    $result->{_error} = $self->{_error}/$s;
124    $result->{_scale} = $self->{_scale};
125    $result;
126}
127
128# Divides a dataset by a scalar.
129# The new Dataset has no data points.
130sub multiplyByScalar {
131    my $self = shift;
132    my $s = shift;
133
134    my $result = Dataset->new();
135    $result->{_mean} = $self->{_mean}*$s;
136    $result->{_error} = $self->{_error}*$s;
137    $result->{_scale} = $self->{_scale};
138    $result;
139}
140
1411;
142