1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "stats_analyzer.h"
16
17 #include <algorithm>
18 #include <iostream>
19 #include <sstream>
20 #include <vector>
21
22 #include "source/enum_string_mapping.h"
23 #include "source/opcode.h"
24 #include "source/spirv_constant.h"
25 #include "spirv/1.1/spirv.h"
26
27 using libspirv::SpirvStats;
28
29 namespace {
30
GetVersionString(uint32_t word)31 std::string GetVersionString(uint32_t word) {
32 std::stringstream ss;
33 ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word)
34 << "." << SPV_SPIRV_VERSION_MINOR_PART(word);
35 return ss.str();
36 }
37
GetGeneratorString(uint32_t word)38 std::string GetGeneratorString(uint32_t word) {
39 return spvGeneratorStr(SPV_GENERATOR_TOOL_PART(word));
40 }
41
GetOpcodeString(uint32_t word)42 std::string GetOpcodeString(uint32_t word) {
43 return spvOpcodeString(static_cast<SpvOp>(word));
44 }
45
GetCapabilityString(uint32_t word)46 std::string GetCapabilityString(uint32_t word) {
47 return libspirv::CapabilityToString(static_cast<SpvCapability>(word));
48 }
49
50 template <class T>
KeyIsLabel(T key)51 std::string KeyIsLabel(T key) {
52 std::stringstream ss;
53 ss << key;
54 return ss.str();
55 }
56
57 template <class Key>
GetRecall(const std::unordered_map<Key,uint32_t> & hist,uint64_t total)58 std::unordered_map<Key, double> GetRecall(
59 const std::unordered_map<Key, uint32_t>& hist, uint64_t total) {
60 std::unordered_map<Key, double> freq;
61 for (const auto& pair : hist) {
62 const double frequency =
63 static_cast<double>(pair.second) / static_cast<double>(total);
64 freq.emplace(pair.first, frequency);
65 }
66 return freq;
67 }
68
69 template <class Key>
GetPrevalence(const std::unordered_map<Key,uint32_t> & hist)70 std::unordered_map<Key, double> GetPrevalence(
71 const std::unordered_map<Key, uint32_t>& hist) {
72 uint64_t total = 0;
73 for (const auto& pair : hist) {
74 total += pair.second;
75 }
76
77 return GetRecall(hist, total);
78 }
79
80 // Writes |freq| to |out| sorted by frequency in the following format:
81 // LABEL3 70%
82 // LABEL1 20%
83 // LABEL2 10%
84 // |label_from_key| is used to convert |Key| to label.
85 template <class Key>
WriteFreq(std::ostream & out,const std::unordered_map<Key,double> & freq,std::string (* label_from_key)(Key),double threshold=0.001)86 void WriteFreq(std::ostream& out, const std::unordered_map<Key, double>& freq,
87 std::string (*label_from_key)(Key), double threshold = 0.001) {
88 std::vector<std::pair<Key, double>> sorted_freq(freq.begin(), freq.end());
89 std::sort(sorted_freq.begin(), sorted_freq.end(),
90 [](const std::pair<Key, double>& left,
91 const std::pair<Key, double>& right) {
92 return left.second > right.second;
93 });
94
95 for (const auto& pair : sorted_freq) {
96 if (pair.second < threshold)
97 break;
98 out << label_from_key(pair.first) << " " << pair.second * 100.0
99 << "%" << std::endl;
100 }
101 }
102
103 // Writes |hist| to |out| sorted by count in the following format:
104 // LABEL3 100
105 // LABEL1 50
106 // LABEL2 10
107 // |label_from_key| is used to convert |Key| to label.
108 template <class Key>
WriteHist(std::ostream & out,const std::unordered_map<Key,uint32_t> & hist,std::string (* label_from_key)(Key))109 void WriteHist(std::ostream& out, const std::unordered_map<Key, uint32_t>& hist,
110 std::string (*label_from_key)(Key)) {
111 std::vector<std::pair<Key, uint32_t>> sorted_hist(hist.begin(), hist.end());
112 std::sort(sorted_hist.begin(), sorted_hist.end(),
113 [](const std::pair<Key, uint32_t>& left,
114 const std::pair<Key, uint32_t>& right) {
115 return left.second > right.second;
116 });
117
118 for (const auto& pair : sorted_hist) {
119 out << label_from_key(pair.first) << " " << pair.second << std::endl;
120 }
121 }
122
123 } // namespace
124
StatsAnalyzer(const SpirvStats & stats)125 StatsAnalyzer::StatsAnalyzer(const SpirvStats& stats) : stats_(stats) {
126 num_modules_ = 0;
127 for (const auto& pair : stats_.version_hist) {
128 num_modules_ += pair.second;
129 }
130
131 version_freq_ = GetRecall(stats_.version_hist, num_modules_);
132 generator_freq_ = GetRecall(stats_.generator_hist, num_modules_);
133 capability_freq_ = GetRecall(stats_.capability_hist, num_modules_);
134 extension_freq_ = GetRecall(stats_.extension_hist, num_modules_);
135 opcode_freq_ = GetPrevalence(stats_.opcode_hist);
136 }
137
WriteVersion(std::ostream & out)138 void StatsAnalyzer::WriteVersion(std::ostream& out) {
139 WriteFreq(out, version_freq_, GetVersionString);
140 }
141
WriteGenerator(std::ostream & out)142 void StatsAnalyzer::WriteGenerator(std::ostream& out) {
143 WriteFreq(out, generator_freq_, GetGeneratorString);
144 }
145
WriteCapability(std::ostream & out)146 void StatsAnalyzer::WriteCapability(std::ostream& out) {
147 WriteFreq(out, capability_freq_, GetCapabilityString);
148 }
149
WriteExtension(std::ostream & out)150 void StatsAnalyzer::WriteExtension(std::ostream& out) {
151 WriteFreq(out, extension_freq_, KeyIsLabel);
152 }
153
WriteOpcode(std::ostream & out)154 void StatsAnalyzer::WriteOpcode(std::ostream& out) {
155 out << "Total unique opcodes used: " << opcode_freq_.size() << std::endl;
156 WriteFreq(out, opcode_freq_, GetOpcodeString);
157 }
158
WriteConstantLiterals(std::ostream & out)159 void StatsAnalyzer::WriteConstantLiterals(std::ostream& out) {
160 out << "Constant literals" << std::endl;
161
162 out << "Float 32" << std::endl;
163 WriteFreq(out, GetPrevalence(stats_.f32_constant_hist), KeyIsLabel);
164
165 out << std::endl << "Float 64" << std::endl;
166 WriteFreq(out, GetPrevalence(stats_.f64_constant_hist), KeyIsLabel);
167
168 out << std::endl << "Unsigned int 16" << std::endl;
169 WriteFreq(out, GetPrevalence(stats_.u16_constant_hist), KeyIsLabel);
170
171 out << std::endl << "Signed int 16" << std::endl;
172 WriteFreq(out, GetPrevalence(stats_.s16_constant_hist), KeyIsLabel);
173
174 out << std::endl << "Unsigned int 32" << std::endl;
175 WriteFreq(out, GetPrevalence(stats_.u32_constant_hist), KeyIsLabel);
176
177 out << std::endl << "Signed int 32" << std::endl;
178 WriteFreq(out, GetPrevalence(stats_.s32_constant_hist), KeyIsLabel);
179
180 out << std::endl << "Unsigned int 64" << std::endl;
181 WriteFreq(out, GetPrevalence(stats_.u64_constant_hist), KeyIsLabel);
182
183 out << std::endl << "Signed int 64" << std::endl;
184 WriteFreq(out, GetPrevalence(stats_.s64_constant_hist), KeyIsLabel);
185 }
186
WriteOpcodeMarkov(std::ostream & out)187 void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) {
188 if (stats_.opcode_markov_hist.empty())
189 return;
190
191 const std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
192 cue_to_hist = stats_.opcode_markov_hist[0];
193
194 // Sort by prevalence of the opcodes in opcode_freq_ (descending).
195 std::vector<std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>>
196 sorted_cue_to_hist(cue_to_hist.begin(), cue_to_hist.end());
197 std::sort(sorted_cue_to_hist.begin(), sorted_cue_to_hist.end(),
198 [this](
199 const std::pair<uint32_t,
200 std::unordered_map<uint32_t, uint32_t>>& left,
201 const std::pair<uint32_t,
202 std::unordered_map<uint32_t, uint32_t>>& right) {
203 const double lf = opcode_freq_[left.first];
204 const double rf = opcode_freq_[right.first];
205 if (lf == rf)
206 return right.first > left.first;
207 return lf > rf;
208 });
209
210 for (const auto& kv : sorted_cue_to_hist) {
211 const uint32_t cue = kv.first;
212 const double kFrequentEnoughToAnalyze = 0.0001;
213 if (opcode_freq_[cue] < kFrequentEnoughToAnalyze) continue;
214
215 const std::unordered_map<uint32_t, uint32_t>& hist = kv.second;
216
217 uint32_t total = 0;
218 for (const auto& pair : hist) {
219 total += pair.second;
220 }
221
222 std::vector<std::pair<uint32_t, uint32_t>>
223 sorted_hist(hist.begin(), hist.end());
224 std::sort(sorted_hist.begin(), sorted_hist.end(),
225 [](const std::pair<uint32_t, uint32_t>& left,
226 const std::pair<uint32_t, uint32_t>& right) {
227 if (left.second == right.second)
228 return right.first > left.first;
229 return left.second > right.second;
230 });
231
232 for (const auto& pair : sorted_hist) {
233 const double prior = opcode_freq_[pair.first];
234 const double posterior =
235 static_cast<double>(pair.second) / static_cast<double>(total);
236 out << GetOpcodeString(cue) << " -> " << GetOpcodeString(pair.first)
237 << " " << posterior * 100 << "% (base rate " << prior * 100
238 << "%, pair occurrences " << pair.second << ")" << std::endl;
239 }
240 }
241 }
242