• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/detokenize.h"
16 
17 #include <string>
18 #include <string_view>
19 
20 #include "pw_stream/memory_stream.h"
21 #include "pw_tokenizer/example_binary_with_tokenized_strings.h"
22 #include "pw_unit_test/framework.h"
23 
24 namespace pw::tokenizer {
25 namespace {
26 
27 using namespace std::literals::string_view_literals;
28 
29 // Use a shorter name for the error string macro.
30 #define ERR PW_TOKENIZER_ARG_DECODING_ERROR
31 
32 using Case = std::pair<std::string_view, std::string_view>;
33 
34 template <typename... Args>
TestCases(Args...args)35 auto TestCases(Args... args) {
36   return std::array<Case, sizeof...(Args)>{args...};
37 }
38 
39 // Database with the following entries and arbitrary token values:
40 // {
41 //   0x00000001: "One",
42 //   0x00000005: "TWO",
43 //   0x000000ff: "333",
44 //   0xDDEEEEFF: "One",
45 //   0xEEEEEEEE: "$AQAAAA==",  # Nested Base64 token for "One"
46 // }
47 constexpr char kTestDatabase[] =
48     "TOKENS\0\0"
49     "\x06\x00\x00\x00"  // Number of tokens in this database.
50     "\0\0\0\0"
51     "\x01\x00\x00\x00----"
52     "\x05\x00\x00\x00----"
53     "\xFF\x00\x00\x00----"
54     "\xFF\xEE\xEE\xDD----"
55     "\xEE\xEE\xEE\xEE----"
56     "\x9D\xA7\x97\xF8----"
57     "One\0"
58     "TWO\0"
59     "333\0"
60     "FOUR\0"
61     "$AQAAAA==\0"
62     "■msg♦This is $AQAAAA== message■module♦■file♦file.txt";
63 
64 constexpr const char kCsvDefaultDomain[] =
65     "1,2001-01-01,,Hello World!\n"
66     "2,,,\n"
67     "3,,, Goodbye!\n";
68 
69 constexpr const char kCsvDifferentDomains[] =
70     "1,          , d o m a i n 1,Hello\n"
71     "2,          , dom  ain2,\n"
72     "3,          ,\t\t\tdomain   3,World!\n";
73 
74 constexpr const char kCsvBadDates[] =
75     "1,01-01-2001, D1, Hello\n"
76     "2,          , D2, \n"
77     "3,          , D3, Goodbye!\n";
78 
79 constexpr const char kCsvBadToken[] =
80     ",2001-01-01, D1, Hello\n"
81     "2,          , D2, \n"
82     "3,          , D3, Goodbye!\n";
83 
84 constexpr const char kCsvBadFormat[] =
85     "1,2001-01-01, D1, Hello\n"
86     "2,, \n"
87     "3,          , D3, Goodbye!\n";
88 
89 class Detokenize : public ::testing::Test {
90  protected:
Detokenize()91   Detokenize() : detok_(TokenDatabase::Create<kTestDatabase>()) {}
92   Detokenizer detok_;
93 };
94 
TEST_F(Detokenize,NoFormatting)95 TEST_F(Detokenize, NoFormatting) {
96   EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One");
97   EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO");
98   EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333");
99   EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR");
100 }
101 
TEST_F(Detokenize,FromElfSection)102 TEST_F(Detokenize, FromElfSection) {
103   // Create a detokenizer from an ELF file with only the pw_tokenizer sections.
104   // See py/detokenize_test.py.
105   // Offset and size of the .pw_tokenizer.entries section in bytes.
106   constexpr uint32_t database_offset_ = 0x00000174;
107   constexpr size_t database_size_ = 0x000004C2;
108 
109   pw::span<const uint8_t> token_entries(
110       reinterpret_cast<const uint8_t*>(::test::ns::kElfSection.data() +
111                                        database_offset_),
112       database_size_);
113   pw::Result<Detokenizer> detok_from_elf_ =
114       Detokenizer::FromElfSection(token_entries);
115   ASSERT_TRUE(detok_from_elf_.ok());
116   EXPECT_EQ(detok_from_elf_->Detokenize("\xd6\x8c\x66\x2e").BestString(),
117             "Jello, world!");
118 }
119 
TEST_F(Detokenize,FromElfSectionCountDomain)120 TEST_F(Detokenize, FromElfSectionCountDomain) {
121   // Create a detokenizer from an ELF file with only the pw_tokenizer sections.
122   // See py/detokenize_test.py.
123   // Offset and size of the .pw_tokenizer.entries section in bytes.
124   constexpr uint32_t database_offset_ = 0x00000174;
125   constexpr size_t database_size_ = 0x000004C2;
126 
127   pw::span<const uint8_t> token_entries(
128       reinterpret_cast<const uint8_t*>(::test::ns::kElfSection.data() +
129                                        database_offset_),
130       database_size_);
131   pw::Result<Detokenizer> detok_from_elf_ =
132       Detokenizer::FromElfSection(token_entries);
133   ASSERT_TRUE(detok_from_elf_.ok());
134 
135   // Two domains exist in the ELF file.
136   // The token 881436a0="The answer is: %s" is in two domains.
137   EXPECT_EQ(detok_from_elf_->database().size(), 2u);
138 }
139 
TEST_F(Detokenize,FromElfFile)140 TEST_F(Detokenize, FromElfFile) {
141   // Create a detokenizer from an ELF file with only the pw_tokenizer sections.
142   // See py/detokenize_test.py.
143   stream::MemoryReader stream(::test::ns::kElfSection);
144 
145   pw::Result<Detokenizer> detok = Detokenizer::FromElfFile(stream);
146   PW_TEST_ASSERT_OK(detok);
147   EXPECT_EQ(detok->Detokenize("\xd6\x8c\x66\x2e").BestString(),
148             "Jello, world!");
149 }
150 
TEST_F(Detokenize,FromCsvFile_DefaultDomain)151 TEST_F(Detokenize, FromCsvFile_DefaultDomain) {
152   pw::Result<Detokenizer> detok_csv = Detokenizer::FromCsv(kCsvDefaultDomain);
153   PW_TEST_ASSERT_OK(detok_csv);
154   EXPECT_EQ(detok_csv->Detokenize("\1\0\0\0"sv).BestString(), "Hello World!");
155 }
156 
TEST_F(Detokenize,FromCsvFile_DifferentDomains_IgnoreWhitespace)157 TEST_F(Detokenize, FromCsvFile_DifferentDomains_IgnoreWhitespace) {
158   pw::Result<Detokenizer> detok_csv =
159       Detokenizer::FromCsv(kCsvDifferentDomains);
160   PW_TEST_ASSERT_OK(detok_csv);
161   auto it = detok_csv->database().begin();
162   EXPECT_EQ(it->first, "domain3");
163   it++;
164   EXPECT_EQ(it->first, "domain2");
165   it++;
166   EXPECT_EQ(it->first, "domain1");
167 }
168 
TEST_F(Detokenize,FromCsvFile_CountDomains)169 TEST_F(Detokenize, FromCsvFile_CountDomains) {
170   pw::Result<Detokenizer> detok_csv1 = Detokenizer::FromCsv(kCsvDefaultDomain);
171   pw::Result<Detokenizer> detok_csv2 =
172       Detokenizer::FromCsv(kCsvDifferentDomains);
173   PW_TEST_ASSERT_OK(detok_csv1);
174   PW_TEST_ASSERT_OK(detok_csv2);
175   EXPECT_EQ(detok_csv1->database().size(), 1u);
176   EXPECT_EQ(detok_csv2->database().size(), 3u);
177 }
178 
TEST_F(Detokenize,FromCsvFile_BadCsv_Date)179 TEST_F(Detokenize, FromCsvFile_BadCsv_Date) {
180   pw::Result<Detokenizer> detok_csv = Detokenizer::FromCsv(kCsvBadDates);
181   EXPECT_FALSE(detok_csv.ok());
182 }
183 
TEST_F(Detokenize,FromCsvFile_BadCsv_Token)184 TEST_F(Detokenize, FromCsvFile_BadCsv_Token) {
185   pw::Result<Detokenizer> detok_csv = Detokenizer::FromCsv(kCsvBadToken);
186   EXPECT_FALSE(detok_csv.ok());
187 }
188 
TEST_F(Detokenize,FromCsvFile_BadCsv_Format)189 TEST_F(Detokenize, FromCsvFile_BadCsv_Format) {
190   pw::Result<Detokenizer> detok_csv = Detokenizer::FromCsv(kCsvBadFormat);
191   // Will give warning but continue as expected:
192   // WRN  Skipped 1 of 3 lines because they did not have 4 columns as expected.
193   EXPECT_TRUE(detok_csv.ok());
194 }
195 
TEST_F(Detokenize,BestString_MissingToken_IsEmpty)196 TEST_F(Detokenize, BestString_MissingToken_IsEmpty) {
197   EXPECT_FALSE(detok_.Detokenize("").ok());
198   EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty());
199 }
200 
TEST_F(Detokenize,BestString_ShorterToken_ZeroExtended)201 TEST_F(Detokenize, BestString_ShorterToken_ZeroExtended) {
202   EXPECT_EQ(detok_.Detokenize("\x42", 1u).token(), 0x42u);
203   EXPECT_EQ(detok_.Detokenize("\1\0"sv).token(), 0x1u);
204   EXPECT_EQ(detok_.Detokenize("\1\0\3"sv).token(), 0x030001u);
205   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).token(), 0x0u);
206 }
207 
TEST_F(Detokenize,BestString_UnknownToken_IsEmpty)208 TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) {
209   EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
210   EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty());
211   EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty());
212   EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty());
213   EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty());
214 }
215 
TEST_F(Detokenize,BestStringWithErrors_MissingToken_ErrorMessage)216 TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) {
217   EXPECT_FALSE(detok_.Detokenize("").ok());
218   EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(),
219             ERR("missing token"));
220 }
221 
TEST_F(Detokenize,BestStringWithErrors_ShorterTokenMatchesStrings)222 TEST_F(Detokenize, BestStringWithErrors_ShorterTokenMatchesStrings) {
223   EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(), "One");
224   EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(), "One");
225   EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(), "One");
226 }
227 
TEST_F(Detokenize,BestStringWithErrors_UnknownToken_ErrorMessage)228 TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) {
229   ASSERT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
230   EXPECT_EQ(detok_.Detokenize("\0"sv).BestStringWithErrors(),
231             ERR("unknown token 00000000"));
232   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(),
233             ERR("unknown token 00000000"));
234   EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(),
235             ERR("unknown token 00000000"));
236   EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(),
237             ERR("unknown token 00000002"));
238   EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(),
239             ERR("unknown token 76543210"));
240   EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(),
241             ERR("unknown token fedcba98"));
242 }
243 
244 // Base64 versions of the tokens
245 #define ONE "$AQAAAA=="
246 #define TWO "$BQAAAA=="
247 #define THREE "$/wAAAA=="
248 #define FOUR "$/+7u3Q=="
249 #define NEST_ONE "$7u7u7g=="
250 
TEST_F(Detokenize,Base64_NoArguments)251 TEST_F(Detokenize, Base64_NoArguments) {
252   for (auto [data, expected] : TestCases(
253            Case{ONE, "One"},
254            Case{TWO, "TWO"},
255            Case{THREE, "333"},
256            Case{FOUR, "FOUR"},
257            Case{FOUR ONE ONE, "FOUROneOne"},
258            Case{ONE TWO THREE FOUR, "OneTWO333FOUR"},
259            Case{ONE "\r\n" TWO "\r\n" THREE "\r\n" FOUR "\r\n",
260                 "One\r\nTWO\r\n333\r\nFOUR\r\n"},
261            Case{"123" FOUR, "123FOUR"},
262            Case{"123" FOUR ", 56", "123FOUR, 56"},
263            Case{"12" THREE FOUR ", 56", "12333FOUR, 56"},
264            Case{"$0" ONE, "$0One"},
265            Case{"$/+7u3Q=", "$/+7u3Q="},  // incomplete message (missing "=")
266            Case{"$123456==" FOUR, "$123456==FOUR"},
267            Case{NEST_ONE, "One"},
268            Case{NEST_ONE NEST_ONE NEST_ONE, "OneOneOne"},
269            Case{FOUR "$" ONE NEST_ONE "?", "FOUR$OneOne?"})) {
270     EXPECT_EQ(detok_.DetokenizeText(data), expected);
271   }
272 }
273 
TEST_F(Detokenize,OptionallyTokenizedData)274 TEST_F(Detokenize, OptionallyTokenizedData) {
275   for (auto [data, expected] : TestCases(
276            Case{ONE, "One"},
277            Case{"\1\0\0\0", "One"},
278            Case{TWO, "TWO"},
279            Case{THREE, "333"},
280            Case{FOUR, "FOUR"},
281            Case{FOUR ONE ONE, "FOUROneOne"},
282            Case{ONE TWO THREE FOUR, "OneTWO333FOUR"},
283            Case{ONE "\r\n" TWO "\r\n" THREE "\r\n" FOUR "\r\n",
284                 "One\r\nTWO\r\n333\r\nFOUR\r\n"},
285            Case{"123" FOUR, "123FOUR"},
286            Case{"123" FOUR ", 56", "123FOUR, 56"},
287            Case{"12" THREE FOUR ", 56", "12333FOUR, 56"},
288            Case{"$0" ONE, "$0One"},
289            Case{"$/+7u3Q=", "$/+7u3Q="},  // incomplete message (missing "=")
290            Case{"$123456==" FOUR, "$123456==FOUR"},
291            Case{NEST_ONE, "One"},
292            Case{NEST_ONE NEST_ONE NEST_ONE, "OneOneOne"},
293            Case{FOUR "$" ONE NEST_ONE "?", "FOUR$OneOne?"},
294            Case{"$naeX+A==",
295                 "■msg♦This is One message■module♦■file♦file.txt"})) {
296     EXPECT_EQ(detok_.DecodeOptionallyTokenizedData(as_bytes(span(data))),
297               std::string(expected));
298   }
299 }
300 
301 constexpr char kDataWithArguments[] =
302     "TOKENS\0\0"
303     "\x09\x00\x00\x00"
304     "\0\0\0\0"
305     "\x00\x00\x00\x00----"
306     "\x0A\x0B\x0C\x0D----"
307     "\x0E\x0F\x00\x01----"
308     "\xAA\xAA\xAA\xAA----"
309     "\xBB\xBB\xBB\xBB----"
310     "\xCC\xCC\xCC\xCC----"
311     "\xDD\xDD\xDD\xDD----"
312     "\xEE\xEE\xEE\xEE----"
313     "\xFF\xFF\xFF\xFF----"
314     "\0"
315     "Use the %s, %s.\0"
316     "Now there are %d of %s!\0"
317     "%c!\0"    // AA
318     "%hhu!\0"  // BB
319     "%hu!\0"   // CC
320     "%u!\0"    // DD
321     "%lu!\0"   // EE
322     "%llu!";   // FF
323 
324 constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>();
325 class DetokenizeWithArgs : public ::testing::Test {
326  protected:
DetokenizeWithArgs()327   DetokenizeWithArgs() : detok_(kWithArgs) {}
328 
329   Detokenizer detok_;
330 };
331 
TEST_F(DetokenizeWithArgs,NoMatches)332 TEST_F(DetokenizeWithArgs, NoMatches) {
333   EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty());
334 }
335 
TEST_F(DetokenizeWithArgs,SingleMatch)336 TEST_F(DetokenizeWithArgs, SingleMatch) {
337   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u);
338 }
339 
TEST_F(DetokenizeWithArgs,Empty)340 TEST_F(DetokenizeWithArgs, Empty) {
341   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), "");
342 }
343 
TEST_F(DetokenizeWithArgs,Successful)344 TEST_F(DetokenizeWithArgs, Successful) {
345   // Run through test cases, but don't include cases that use %hhu or %llu since
346   // these are not currently supported in arm-none-eabi-gcc.
347   for (auto [data, expected] : TestCases(
348            Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."},
349            Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"},
350            Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"},
351            Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"},
352            Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"},
353            Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"},
354            Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"},
355            Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) {
356     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
357   }
358 }
359 
TEST_F(DetokenizeWithArgs,ExtraDataError)360 TEST_F(DetokenizeWithArgs, ExtraDataError) {
361   auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv);
362   EXPECT_FALSE(error.ok());
363   EXPECT_EQ("", error.BestString());
364 }
365 
TEST_F(DetokenizeWithArgs,MissingArgumentError)366 TEST_F(DetokenizeWithArgs, MissingArgumentError) {
367   auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv);
368   EXPECT_FALSE(error.ok());
369   EXPECT_EQ(error.BestString(), "Use the force, %s.");
370   EXPECT_EQ(error.BestStringWithErrors(),
371             "Use the force, " ERR("%s MISSING") ".");
372 }
373 
TEST_F(DetokenizeWithArgs,DecodingError)374 TEST_F(DetokenizeWithArgs, DecodingError) {
375   auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv);
376   EXPECT_FALSE(error.ok());
377   EXPECT_EQ(error.BestString(), "Now there are %d of %s!");
378   EXPECT_EQ(error.BestStringWithErrors(),
379             "Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!");
380 }
381 
382 constexpr char kDataWithCollisions[] =
383     "TOKENS\0\0"
384     "\x0F\x00\x00\x00"
385     "\0\0\0\0"
386     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 1
387     "\x00\x00\x00\x00\x01\x02\x03\x04"  // 2
388     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 3
389     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 4
390     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 5
391     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 6
392     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 7
393     "\xAA\xAA\xAA\xAA\x00\x00\x00\x00"  // 8
394     "\xAA\xAA\xAA\xAA\xff\xff\xff\xff"  // 9
395     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // A
396     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // B
397     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // C
398     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // D
399     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // E
400     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // F
401     // String table
402     "This string is present\0"   // 1
403     "This string is removed\0"   // 2
404     "One arg %d\0"               // 3
405     "One arg %s\0"               // 4
406     "Two args %s %u\0"           // 5
407     "Two args %s %s %% %% %%\0"  // 6
408     "Four args %d %d %d %d\0"    // 7
409     "This one is removed\0"      // 8
410     "This one is present\0"      // 9
411     "Two ints %d %d\0"           // A
412     "Three ints %d %d %d\0"      // B
413     "Three strings %s %s %s\0"   // C
414     "Two strings %s %s\0"        // D
415     "Three %s %s %s\0"           // E
416     "Five %d %d %d %d %s\0";     // F
417 
418 constexpr TokenDatabase kWithCollisions =
419     TokenDatabase::Create<kDataWithCollisions>();
420 
421 class DetokenizeWithCollisions : public ::testing::Test {
422  protected:
DetokenizeWithCollisions()423   DetokenizeWithCollisions() : detok_(kWithCollisions) {}
424 
425   Detokenizer detok_;
426 };
427 
TEST_F(DetokenizeWithCollisions,Collision_AlwaysPreferSuccessfulDecode)428 TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) {
429   for (auto [data, expected] :
430        TestCases(Case{"\0\0\0\0"sv, "This string is present"},
431                  Case{"\0\0\0\0\x01"sv, "One arg -1"},
432                  Case{"\0\0\0\0\x80"sv, "One arg [...]"},
433                  Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) {
434     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
435   }
436 }
437 
TEST_F(DetokenizeWithCollisions,Collision_PreferDecodingAllBytes)438 TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) {
439   for (auto [data, expected] :
440        TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"},
441                  Case{"\0\0\0\0\x08?"sv, "One arg %s"},
442                  Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) {
443     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
444   }
445 }
446 
TEST_F(DetokenizeWithCollisions,Collision_PreferFewestDecodingErrors)447 TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) {
448   for (auto [data, expected] :
449        TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"},
450                  Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) {
451     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
452   }
453 }
454 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs)455 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) {
456   auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv);
457   EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s");
458   EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv);
459 }
460 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs_NoPercent)461 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) {
462   // The "Two args %s %s ..." string successfully decodes this, and has more
463   // "arguments", because of %%, but %% doesn't count as as a decoded argument.
464   EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(),
465             "Four args -1 0 -1 1");
466 }
467 
TEST_F(DetokenizeWithCollisions,Collision_PreferStillPresentString)468 TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) {
469   for (auto [data, expected] :
470        TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"},
471                  Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) {
472     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
473   }
474 }
475 
TEST_F(DetokenizeWithCollisions,Collision_TracksAllMatches)476 TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) {
477   auto result = detok_.Detokenize("\0\0\0\0"sv);
478   EXPECT_EQ(result.matches().size(), 7u);
479 }
480 
481 }  // namespace
482 }  // namespace pw::tokenizer
483