• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Analyze benchmark JSON files --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This code analyzes the json file produced by the `automemcpy` binary.
9 //
10 // As a remainder, `automemcpy` will benchmark each autogenerated memory
11 // functions against one of the predefined distributions available in the
12 // `libc/benchmarks/distributions` folder.
13 //
14 // It works as follows:
15 // - Reads one or more json files.
16 // - If there are several runs for the same function and distribution, picks the
17 //   median throughput (aka `BytesPerSecond`).
18 // - Aggregates the throughput per distributions and scores them from worst (0)
19 //   to best (1).
20 // - Each distribution categorizes each function into one of the following
21 //   categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
22 //   BAD.
23 // - A process similar to the Majority Judgment voting system is used to `elect`
24 //   the best function. The histogram of grades is returned so we can
25 //   distinguish between functions with the same final grade. In the following
26 //   example both functions grade EXCELLENT but we may prefer the second one.
27 //
28 //   |            | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
29 //   |------------|-----------|-----------|------|----------| ...
30 //   | Function_1 |     7     |     1     |   2  |          | ...
31 //   | Function_2 |     6     |     4     |      |          | ...
32 
33 #include "automemcpy/ResultAnalyzer.h"
34 #include "llvm/ADT/StringRef.h"
35 #include <numeric>
36 #include <unordered_map>
37 
38 namespace llvm {
39 
40 namespace automemcpy {
41 
getString(const GradeEnum & GE)42 StringRef Grade::getString(const GradeEnum &GE) {
43   switch (GE) {
44   case EXCELLENT:
45     return "EXCELLENT";
46   case VERY_GOOD:
47     return "VERY_GOOD";
48   case GOOD:
49     return "GOOD";
50   case PASSABLE:
51     return "PASSABLE";
52   case INADEQUATE:
53     return "INADEQUATE";
54   case MEDIOCRE:
55     return "MEDIOCRE";
56   case BAD:
57     return "BAD";
58   case ARRAY_SIZE:
59     report_fatal_error("logic error");
60   }
61 }
62 
judge(double Score)63 Grade::GradeEnum Grade::judge(double Score) {
64   if (Score >= 6. / 7)
65     return EXCELLENT;
66   if (Score >= 5. / 7)
67     return VERY_GOOD;
68   if (Score >= 4. / 7)
69     return GOOD;
70   if (Score >= 3. / 7)
71     return PASSABLE;
72   if (Score >= 2. / 7)
73     return INADEQUATE;
74   if (Score >= 1. / 7)
75     return MEDIOCRE;
76   return BAD;
77 }
78 
computeUnbiasedSampleVariance(const std::vector<double> & Samples,const double SampleMean)79 static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
80                                             const double SampleMean) {
81   assert(!Samples.empty());
82   if (Samples.size() == 1)
83     return 0;
84   double DiffSquaresSum = 0;
85   for (const double S : Samples) {
86     const double Diff = S - SampleMean;
87     DiffSquaresSum += Diff * Diff;
88   }
89   return DiffSquaresSum / (Samples.size() - 1);
90 }
91 
processPerDistributionData(PerDistributionData & Data)92 static void processPerDistributionData(PerDistributionData &Data) {
93   auto &Samples = Data.BytesPerSecondSamples;
94   assert(!Samples.empty());
95   // Sample Mean
96   const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
97   Data.BytesPerSecondMean = Sum / Samples.size();
98   // Unbiased Sample Variance
99   Data.BytesPerSecondVariance =
100       computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
101   // Median
102   const size_t HalfSize = Samples.size() / 2;
103   std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
104   Data.BytesPerSecondMedian = Samples[HalfSize];
105 }
106 
getThroughputs(ArrayRef<Sample> Samples)107 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
108   std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
109   for (const auto &S : Samples) {
110     if (S.Type != SampleType::ITERATION)
111       break;
112     auto &Function = Functions[S.Id.Function];
113     auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
114     Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
115   }
116 
117   std::vector<FunctionData> Output;
118   for (auto &[FunctionId, Function] : Functions) {
119     Function.Id = FunctionId;
120     for (auto &Pair : Function.PerDistributionData)
121       processPerDistributionData(Pair.second);
122     Output.push_back(std::move(Function));
123   }
124   return Output;
125 }
126 
fillScores(MutableArrayRef<FunctionData> Functions)127 void fillScores(MutableArrayRef<FunctionData> Functions) {
128   // A key to bucket throughput per function type and distribution.
129   struct Key {
130     FunctionType Type;
131     StringRef Distribution;
132 
133     COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
134   };
135 
136   // Tracks minimum and maximum values.
137   struct MinMax {
138     double Min = std::numeric_limits<double>::max();
139     double Max = std::numeric_limits<double>::min();
140     void update(double Value) {
141       if (Value < Min)
142         Min = Value;
143       if (Value > Max)
144         Max = Value;
145     }
146     double normalize(double Value) const { return (Value - Min) / (Max - Min); }
147   };
148 
149   std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
150   for (const auto &Function : Functions) {
151     const FunctionType Type = Function.Id.Type;
152     for (const auto &Pair : Function.PerDistributionData) {
153       const auto &Distribution = Pair.getKey();
154       const double Throughput = Pair.getValue().BytesPerSecondMedian;
155       const Key K{Type, Distribution};
156       ThroughputMinMax[K].update(Throughput);
157     }
158   }
159 
160   for (auto &Function : Functions) {
161     const FunctionType Type = Function.Id.Type;
162     for (const auto &Pair : Function.PerDistributionData) {
163       const auto &Distribution = Pair.getKey();
164       const double Throughput = Pair.getValue().BytesPerSecondMedian;
165       const Key K{Type, Distribution};
166       Function.PerDistributionData[Distribution].Score =
167           ThroughputMinMax[K].normalize(Throughput);
168     }
169   }
170 }
171 
castVotes(MutableArrayRef<FunctionData> Functions)172 void castVotes(MutableArrayRef<FunctionData> Functions) {
173   for (FunctionData &Function : Functions) {
174     Function.ScoresGeoMean = 1.0;
175     for (const auto &Pair : Function.PerDistributionData) {
176       const StringRef Distribution = Pair.getKey();
177       const double Score = Pair.getValue().Score;
178       Function.ScoresGeoMean *= Score;
179       const auto G = Grade::judge(Score);
180       ++(Function.GradeHisto[G]);
181       Function.PerDistributionData[Distribution].Grade = G;
182     }
183   }
184 
185   for (FunctionData &Function : Functions) {
186     const auto &GradeHisto = Function.GradeHisto;
187     const size_t Votes =
188         std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
189     const size_t MedianVote = Votes / 2;
190     size_t CountedVotes = 0;
191     Grade::GradeEnum MedianGrade = Grade::BAD;
192     for (size_t I = 0; I < GradeHisto.size(); ++I) {
193       CountedVotes += GradeHisto[I];
194       if (CountedVotes > MedianVote) {
195         MedianGrade = Grade::GradeEnum(I);
196         break;
197       }
198     }
199     Function.FinalGrade = MedianGrade;
200   }
201 }
202 
203 } // namespace automemcpy
204 } // namespace llvm
205