1import { datasetName, histogramPoints } from "../transforms/standard-mappers.js";
2import type { Series } from "../types/chart.js";
3import type { ChartData, Metrics } from "../types/data.js";
4
5export class StatService {
6  pSeries(metrics: Metrics<number>, activeDatasets: Set<string>): Series[] {
7    if (activeDatasets.size <= 0) {
8      return [];
9    }
10
11    const series: Series[] = [];
12    const standard = metrics.standard;
13    const sampled = metrics.sampled;
14    if (sampled) {
15      for (let i = 0; i < sampled.length; i += 1) {
16        const metric = sampled[i];
17        const name = datasetName(metric);
18        if (activeDatasets.has(name)) {
19          const data: Record<string, ChartData<number[]>> = metric.data;
20          const entries = Object.entries(data);
21          const comparables: ChartData<number[]>[] = entries.map(entry => entry[1]);
22          if (comparables.length > 1) {
23            const reference = comparables[0];
24            for (let j = 1; j < comparables.length; j += 1) {
25              const target = comparables[j];
26              if (!(reference && target)) {
27                continue;
28              }
29              const [delta, distribution] = this.buildDistribution(reference, target);
30              const [points, pPlots, p] = histogramPoints([distribution], /* buckets */ 100, /* target */ delta);
31              series.push({
32                descriptiveLabel: `${name} { ${metric.label} } - Likelihood`,
33                type: "line",
34                data: points,
35                options: {
36                  tension: 0.3
37                }
38              });
39              if (pPlots && pPlots.length > 0) {
40                series.push({
41                  descriptiveLabel: `${name} { ${metric.label} } - { P = ${p} }`,
42                  type: "bar",
43                  data: pPlots,
44                  options: {
45                    tension: 0.01
46                  }
47                });
48              }
49            }
50          }
51        }
52      }
53    }
54    if (standard) {
55      for (let i = 0; i < standard.length; i += 1) {
56        const metric = standard[i];
57        const name = datasetName(metric);
58        if (activeDatasets.has(name)) {
59          const data: Record<string, ChartData<number>> = metric.data;
60          const entries = Object.entries(data);
61          const comparables: ChartData<number>[] = entries.map(entry => entry[1]);
62          if (comparables.length > 1) {
63            const reference = comparables[0];
64            for (let j = 1; j < comparables.length; j += 1) {
65              const target = comparables[j];
66              if (!(reference && target)) {
67                continue;
68              }
69              const [delta, distribution] = this.buildStandardDistribution(reference, target);
70              const [points, pPlots, p] = histogramPoints([distribution], /* buckets */ 100, /* target */ delta);
71              series.push({
72                descriptiveLabel: `${name} { ${metric.label} } - Likelihood`,
73                type: "line",
74                data: points,
75                options: {
76                  tension: 0.3
77                }
78              });
79              if (pPlots && pPlots.length > 0) {
80                series.push({
81                  descriptiveLabel: `${name} { ${metric.label} } - { P = ${p} }`,
82                  type: "bar",
83                  data: pPlots,
84                  options: {
85                    tension: 0.01
86                  }
87                });
88              }
89            }
90          }
91        }
92      }
93    }
94    return series;
95  }
96
97  private buildStandardDistribution(
98    reference: ChartData<number>,
99    target: ChartData<number>,
100    N: number = 100_000): [number, number[]] {
101    // Compute delta mean
102    const referenceData = reference.values;
103    const targetData = target.values;
104    const referenceMean = this.mean(referenceData);
105    const targetMean = this.mean(targetData);
106    const deltaMean = referenceMean - targetMean;
107    // Simulate
108    const rs = referenceData.length;
109    const ts = targetData.length;
110    const combined: number[] = [...referenceData, ...targetData];
111    const means: number[] = [];
112    for (let i = 0; i < N; i += 1) {
113      const [r, t] = this.shuffleSplit(combined, [rs, ts]);
114      const mr = this.mean(r);
115      const mt = this.mean(t);
116      means.push(mr - mt);
117    }
118    return [deltaMean, means];
119  }
120
121  private buildDistribution(
122    reference: ChartData<number[]>,
123    target: ChartData<number[]>,
124    N: number = 1_000
125  ): [number, number[]] {
126    // Compute delta mean
127    const referenceData = reference.values;
128    const targetData = target.values;
129    const referenceMedian = this.arrayMedian(referenceData);
130    const targetMedian = this.arrayMedian(targetData);
131    const deltaMedian = referenceMedian - targetMedian;
132    // Simulate
133    const rs = referenceData.length;
134    const ts = targetData.length;
135    const combined: number[][] = [...referenceData, ...targetData];
136    const medians: number[] = [];
137    for (let i = 0; i < N; i += 1) {
138      const [r, t] = this.shuffleSplit(combined, [rs, ts]);
139      const mr = this.arrayMedian(r);
140      const mt = this.arrayMedian(t);
141      medians.push(mr - mt);
142    }
143    return [deltaMedian, medians];
144  }
145
146  private shuffleSplit<T>(data: T[], sizes: number[]): T[][] {
147    const shuffled = this.shuffle(data);
148    const splits: T[][] = [];
149    let index = 0;
150    for (let i = 0; i < sizes.length; i += 1) {
151      const size = sizes[i];
152      let split: T[] = [];
153      for (let j = 0; j < size; j += 1) {
154        const k = index + j;
155        if (k < shuffled.length) {
156          split.push(shuffled[k]);
157        }
158      }
159      index += size;
160      splits.push(split);
161    }
162    return splits;
163  }
164
165  private arrayMedian(data: number[][]): number {
166    // We don't want to compute median of medians here.
167    // This is because while individual runs are correlated
168    // we can still look at the actual metrics in aggregate.
169    return this.median(data.flat());
170  }
171
172  private mean(data: number[]): number {
173    if (data.length <= 0) return 0;
174    let sum = 0;
175    for (let i = 0; i < data.length; i += 1) {
176      sum += data[i];
177    }
178    return (sum / data.length);
179  }
180
181  private median(data: number[]): number {
182    const copy = [...data];
183    // Default comparator coerces types to string !
184    copy.sort((a, b) => a - b); // in-place
185    const length = copy.length;
186    const index = Math.trunc(length / 2);
187    return copy[index];
188  }
189
190  private shuffle<T>(data: T[], multiplier: number = 1): T[] {
191    if (data.length <= 0) {
192      return [];
193    }
194
195    let copy = [...data];
196    const count = copy.length * multiplier;
197    const slots = copy.length - 1;
198    for (let i = 0; i < count; i += 1) {
199      const sourceIndex = Math.ceil(Math.random() * slots);
200      const targetIndex = Math.ceil(Math.random() * slots);
201      let source = copy[sourceIndex];
202      let target = copy[targetIndex];
203      copy[sourceIndex] = target;
204      copy[targetIndex] = source;
205    }
206    return copy;
207  }
208
209}
210