• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 The Tint Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "fuzzers/tint_regex_fuzzer/wgsl_mutator.h"
16 
17 #include <cassert>
18 #include <cstring>
19 #include <map>
20 #include <regex>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "fuzzers/random_generator.h"
26 
27 namespace tint {
28 namespace fuzzers {
29 namespace regex_fuzzer {
30 
FindDelimiterIndices(const std::string & delimiter,const std::string & wgsl_code)31 std::vector<size_t> FindDelimiterIndices(const std::string& delimiter,
32                                          const std::string& wgsl_code) {
33   std::vector<size_t> result;
34   for (size_t pos = wgsl_code.find(delimiter, 0); pos != std::string::npos;
35        pos = wgsl_code.find(delimiter, pos + 1)) {
36     result.push_back(pos);
37   }
38 
39   return result;
40 }
41 
GetIdentifiers(const std::string & wgsl_code)42 std::vector<std::pair<size_t, size_t>> GetIdentifiers(
43     const std::string& wgsl_code) {
44   std::vector<std::pair<size_t, size_t>> result;
45 
46   // This regular expression works by looking for a character that
47   // is not part of an identifier followed by a WGSL identifier, followed
48   // by a character which cannot be part of a WGSL identifer. The regex
49   // for the WGSL identifier is obtained from:
50   // https://www.w3.org/TR/WGSL/#identifiers.
51   std::regex wgsl_identifier_regex(
52       "[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]");
53 
54   std::smatch match;
55 
56   std::string::const_iterator search_start(wgsl_code.cbegin());
57   std::string prefix;
58 
59   while (regex_search(search_start, wgsl_code.cend(), match,
60                       wgsl_identifier_regex) == true) {
61     prefix += match.prefix();
62     result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size()));
63     prefix += match.str(0);
64     search_start = match.suffix().first;
65   }
66   return result;
67 }
68 
GetIntLiterals(const std::string & s)69 std::vector<std::pair<size_t, size_t>> GetIntLiterals(const std::string& s) {
70   std::vector<std::pair<size_t, size_t>> result;
71 
72   // Looks for integer literals in decimal or hexadecimal form.
73   // Regex obtained here: https://www.w3.org/TR/WGSL/#literals
74   std::regex int_literal_regex("-?0x[0-9a-fA-F]+ | 0 | -?[1-9][0-9]*");
75   std::regex uint_literal_regex("0x[0-9a-fA-F]+u | 0u | [1-9][0-9]*u");
76   std::smatch match;
77 
78   std::string::const_iterator search_start(s.cbegin());
79   std::string prefix = "";
80 
81   while (regex_search(search_start, s.cend(), match, int_literal_regex) ||
82          regex_search(search_start, s.cend(), match, uint_literal_regex)) {
83     prefix += match.prefix();
84     result.push_back(
85         std::make_pair(prefix.size() + 1, match.str(0).size() - 1));
86     prefix += match.str(0);
87     search_start = match.suffix().first;
88   }
89   return result;
90 }
91 
FindClosingBrace(size_t opening_bracket_pos,const std::string & wgsl_code)92 size_t FindClosingBrace(size_t opening_bracket_pos,
93                         const std::string& wgsl_code) {
94   size_t open_bracket_count = 1;
95   size_t pos = opening_bracket_pos + 1;
96   while (open_bracket_count >= 1 && pos < wgsl_code.size()) {
97     if (wgsl_code[pos] == '{') {
98       ++open_bracket_count;
99     } else if (wgsl_code[pos] == '}') {
100       --open_bracket_count;
101     }
102     ++pos;
103   }
104   return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1;
105 }
106 
GetFunctionBodyPositions(const std::string & wgsl_code)107 std::vector<size_t> GetFunctionBodyPositions(const std::string& wgsl_code) {
108   // Finds all the functions with a non-void return value.
109   std::regex function_regex("fn.*?->.*?\\{");
110   std::smatch match;
111   std::vector<size_t> result;
112 
113   auto search_start(wgsl_code.cbegin());
114   std::string prefix = "";
115 
116   while (std::regex_search(search_start, wgsl_code.cend(), match,
117                            function_regex)) {
118     result.push_back(
119         static_cast<size_t>(match.suffix().first - wgsl_code.cbegin() - 1L));
120     search_start = match.suffix().first;
121   }
122   return result;
123 }
124 
InsertReturnStatement(std::string & wgsl_code,RandomGenerator & generator)125 bool InsertReturnStatement(std::string& wgsl_code, RandomGenerator& generator) {
126   std::vector<size_t> function_body_positions =
127       GetFunctionBodyPositions(wgsl_code);
128 
129   // No function was found in wgsl_code.
130   if (function_body_positions.empty()) {
131     return false;
132   }
133 
134   // Pick a random function's opening bracket, find the corresponding closing
135   // bracket, and find a semi-colon within the function body.
136   size_t left_bracket_pos = generator.GetRandomElement(function_body_positions);
137 
138   size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code);
139 
140   if (right_bracket_pos == 0) {
141     return false;
142   }
143 
144   std::vector<size_t> semicolon_positions;
145   for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1);
146        pos < right_bracket_pos; pos = wgsl_code.find(";", pos + 1)) {
147     semicolon_positions.push_back(pos);
148   }
149 
150   if (semicolon_positions.empty()) {
151     return false;
152   }
153 
154   size_t semicolon_position = generator.GetRandomElement(semicolon_positions);
155 
156   // Get all identifiers and integer literals to use as potential return values.
157   std::vector<std::pair<size_t, size_t>> identifiers =
158       GetIdentifiers(wgsl_code);
159   auto return_values = identifiers;
160   std::vector<std::pair<size_t, size_t>> int_literals =
161       GetIntLiterals(wgsl_code);
162   return_values.insert(return_values.end(), int_literals.begin(),
163                        int_literals.end());
164   std::pair<size_t, size_t> return_value =
165       generator.GetRandomElement(return_values);
166   std::string return_statement =
167       "return " + wgsl_code.substr(return_value.first, return_value.second) +
168       ";";
169 
170   // Insert the return statement immediately after the semicolon.
171   wgsl_code.insert(semicolon_position + 1, return_statement);
172   return true;
173 }
174 
SwapIntervals(size_t idx1,size_t reg1_len,size_t idx2,size_t reg2_len,std::string & wgsl_code)175 void SwapIntervals(size_t idx1,
176                    size_t reg1_len,
177                    size_t idx2,
178                    size_t reg2_len,
179                    std::string& wgsl_code) {
180   std::string region_1 = wgsl_code.substr(idx1 + 1, reg1_len - 1);
181 
182   std::string region_2 = wgsl_code.substr(idx2 + 1, reg2_len - 1);
183 
184   // The second transformation is done first as it doesn't affect idx2.
185   wgsl_code.replace(idx2 + 1, region_2.size(), region_1);
186 
187   wgsl_code.replace(idx1 + 1, region_1.size(), region_2);
188 }
189 
DeleteInterval(size_t idx1,size_t reg_len,std::string & wgsl_code)190 void DeleteInterval(size_t idx1, size_t reg_len, std::string& wgsl_code) {
191   wgsl_code.erase(idx1 + 1, reg_len - 1);
192 }
193 
DuplicateInterval(size_t idx1,size_t reg1_len,size_t idx2,std::string & wgsl_code)194 void DuplicateInterval(size_t idx1,
195                        size_t reg1_len,
196                        size_t idx2,
197                        std::string& wgsl_code) {
198   std::string region = wgsl_code.substr(idx1 + 1, reg1_len - 1);
199   wgsl_code.insert(idx2 + 1, region);
200 }
201 
ReplaceRegion(size_t idx1,size_t id1_len,size_t idx2,size_t id2_len,std::string & wgsl_code)202 void ReplaceRegion(size_t idx1,
203                    size_t id1_len,
204                    size_t idx2,
205                    size_t id2_len,
206                    std::string& wgsl_code) {
207   std::string region_1 = wgsl_code.substr(idx1, id1_len);
208   std::string region_2 = wgsl_code.substr(idx2, id2_len);
209   wgsl_code.replace(idx2, region_2.size(), region_1);
210 }
211 
ReplaceInterval(size_t start_index,size_t length,std::string replacement_text,std::string & wgsl_code)212 void ReplaceInterval(size_t start_index,
213                      size_t length,
214                      std::string replacement_text,
215                      std::string& wgsl_code) {
216   std::string region_1 = wgsl_code.substr(start_index, length);
217   wgsl_code.replace(start_index, length, replacement_text);
218 }
219 
SwapRandomIntervals(const std::string & delimiter,std::string & wgsl_code,RandomGenerator & generator)220 bool SwapRandomIntervals(const std::string& delimiter,
221                          std::string& wgsl_code,
222                          RandomGenerator& generator) {
223   std::vector<size_t> delimiter_positions =
224       FindDelimiterIndices(delimiter, wgsl_code);
225 
226   // Need to have at least 3 indices.
227   if (delimiter_positions.size() < 3) {
228     return false;
229   }
230 
231   // Choose indices:
232   //   interval_1_start < interval_1_end <= interval_2_start < interval_2_end
233   uint32_t interval_1_start = generator.GetUInt32(
234       static_cast<uint32_t>(delimiter_positions.size()) - 2u);
235   uint32_t interval_1_end = generator.GetUInt32(
236       interval_1_start + 1u,
237       static_cast<uint32_t>(delimiter_positions.size()) - 1u);
238   uint32_t interval_2_start = generator.GetUInt32(
239       interval_1_end, static_cast<uint32_t>(delimiter_positions.size()) - 1u);
240   uint32_t interval_2_end = generator.GetUInt32(
241       interval_2_start + 1u, static_cast<uint32_t>(delimiter_positions.size()));
242 
243   SwapIntervals(delimiter_positions[interval_1_start],
244                 delimiter_positions[interval_1_end] -
245                     delimiter_positions[interval_1_start],
246                 delimiter_positions[interval_2_start],
247                 delimiter_positions[interval_2_end] -
248                     delimiter_positions[interval_2_start],
249                 wgsl_code);
250 
251   return true;
252 }
253 
DeleteRandomInterval(const std::string & delimiter,std::string & wgsl_code,RandomGenerator & generator)254 bool DeleteRandomInterval(const std::string& delimiter,
255                           std::string& wgsl_code,
256                           RandomGenerator& generator) {
257   std::vector<size_t> delimiter_positions =
258       FindDelimiterIndices(delimiter, wgsl_code);
259 
260   // Need to have at least 2 indices.
261   if (delimiter_positions.size() < 2) {
262     return false;
263   }
264 
265   uint32_t interval_start = generator.GetUInt32(
266       static_cast<uint32_t>(delimiter_positions.size()) - 1u);
267   uint32_t interval_end = generator.GetUInt32(
268       interval_start + 1u, static_cast<uint32_t>(delimiter_positions.size()));
269 
270   DeleteInterval(
271       delimiter_positions[interval_start],
272       delimiter_positions[interval_end] - delimiter_positions[interval_start],
273       wgsl_code);
274 
275   return true;
276 }
277 
DuplicateRandomInterval(const std::string & delimiter,std::string & wgsl_code,RandomGenerator & generator)278 bool DuplicateRandomInterval(const std::string& delimiter,
279                              std::string& wgsl_code,
280                              RandomGenerator& generator) {
281   std::vector<size_t> delimiter_positions =
282       FindDelimiterIndices(delimiter, wgsl_code);
283 
284   // Need to have at least 2 indices
285   if (delimiter_positions.size() < 2) {
286     return false;
287   }
288 
289   uint32_t interval_start = generator.GetUInt32(
290       static_cast<uint32_t>(delimiter_positions.size()) - 1u);
291   uint32_t interval_end = generator.GetUInt32(
292       interval_start + 1u, static_cast<uint32_t>(delimiter_positions.size()));
293   uint32_t duplication_point =
294       generator.GetUInt32(static_cast<uint32_t>(delimiter_positions.size()));
295 
296   DuplicateInterval(
297       delimiter_positions[interval_start],
298       delimiter_positions[interval_end] - delimiter_positions[interval_start],
299       delimiter_positions[duplication_point], wgsl_code);
300 
301   return true;
302 }
303 
ReplaceRandomIdentifier(std::string & wgsl_code,RandomGenerator & generator)304 bool ReplaceRandomIdentifier(std::string& wgsl_code,
305                              RandomGenerator& generator) {
306   std::vector<std::pair<size_t, size_t>> identifiers =
307       GetIdentifiers(wgsl_code);
308 
309   // Need at least 2 identifiers
310   if (identifiers.size() < 2) {
311     return false;
312   }
313 
314   uint32_t id1_index =
315       generator.GetUInt32(static_cast<uint32_t>(identifiers.size()));
316   uint32_t id2_index =
317       generator.GetUInt32(static_cast<uint32_t>(identifiers.size()));
318 
319   // The two identifiers must be different
320   while (id1_index == id2_index) {
321     id2_index = generator.GetUInt32(static_cast<uint32_t>(identifiers.size()));
322   }
323 
324   ReplaceRegion(identifiers[id1_index].first, identifiers[id1_index].second,
325                 identifiers[id2_index].first, identifiers[id2_index].second,
326                 wgsl_code);
327 
328   return true;
329 }
330 
ReplaceRandomIntLiteral(std::string & wgsl_code,RandomGenerator & generator)331 bool ReplaceRandomIntLiteral(std::string& wgsl_code,
332                              RandomGenerator& generator) {
333   std::vector<std::pair<size_t, size_t>> literals = GetIntLiterals(wgsl_code);
334 
335   // Need at least one integer literal
336   if (literals.size() < 1) {
337     return false;
338   }
339 
340   uint32_t literal_index =
341       generator.GetUInt32(static_cast<uint32_t>(literals.size()));
342 
343   // INT_MAX = 2147483647, INT_MIN = -2147483648
344   std::vector<std::string> boundary_values = {
345       "2147483647", "-2147483648", "1", "-1", "0", "4294967295"};
346 
347   uint32_t boundary_index =
348       generator.GetUInt32(static_cast<uint32_t>(boundary_values.size()));
349 
350   ReplaceInterval(literals[literal_index].first, literals[literal_index].second,
351                   boundary_values[boundary_index], wgsl_code);
352 
353   return true;
354 }
355 
356 }  // namespace regex_fuzzer
357 }  // namespace fuzzers
358 }  // namespace tint
359