• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/detokenize.h"
16 
17 #include <string_view>
18 
19 #include "pw_tokenizer/example_binary_with_tokenized_strings.h"
20 #include "pw_unit_test/framework.h"
21 
22 namespace pw::tokenizer {
23 namespace {
24 
25 using namespace std::literals::string_view_literals;
26 
27 // Use a shorter name for the error string macro.
28 #define ERR PW_TOKENIZER_ARG_DECODING_ERROR
29 
30 using Case = std::pair<std::string_view, std::string_view>;
31 
32 template <typename... Args>
TestCases(Args...args)33 auto TestCases(Args... args) {
34   return std::array<Case, sizeof...(Args)>{args...};
35 }
36 
37 // Database with the following entries and arbitrary token values:
38 // {
39 //   0x00000001: "One",
40 //   0x00000005: "TWO",
41 //   0x000000ff: "333",
42 //   0xDDEEEEFF: "One",
43 //   0xEEEEEEEE: "$AQAAAA==",  # Nested Base64 token for "One"
44 // }
45 constexpr char kTestDatabase[] =
46     "TOKENS\0\0"
47     "\x06\x00\x00\x00"  // Number of tokens in this database.
48     "\0\0\0\0"
49     "\x01\x00\x00\x00----"
50     "\x05\x00\x00\x00----"
51     "\xFF\x00\x00\x00----"
52     "\xFF\xEE\xEE\xDD----"
53     "\xEE\xEE\xEE\xEE----"
54     "\x9D\xA7\x97\xF8----"
55     "One\0"
56     "TWO\0"
57     "333\0"
58     "FOUR\0"
59     "$AQAAAA==\0"
60     "■msg♦This is $AQAAAA== message■module♦■file♦file.txt";
61 
62 class Detokenize : public ::testing::Test {
63  protected:
Detokenize()64   Detokenize() : detok_(TokenDatabase::Create<kTestDatabase>()) {}
65   Detokenizer detok_;
66 };
67 
TEST_F(Detokenize,NoFormatting)68 TEST_F(Detokenize, NoFormatting) {
69   EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One");
70   EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO");
71   EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333");
72   EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR");
73 }
74 
TEST_F(Detokenize,FromElfSection)75 TEST_F(Detokenize, FromElfSection) {
76   // Create a detokenizer from an ELF file with only the pw_tokenizer sections.
77   // See py/detokenize_test.py.
78   // Offset and size of the .pw_tokenizer.entries section in bytes.
79   constexpr uint32_t database_offset_ = 0x00000174;
80   constexpr size_t database_size_ = 0x000004C2;
81 
82   pw::span<const uint8_t> tokenEntries(
83       reinterpret_cast<const uint8_t*>(test::ns::kElfSection.data() +
84                                        database_offset_),
85       database_size_);
86   pw::Result<Detokenizer> detok_from_elf_ =
87       Detokenizer::FromElfSection(tokenEntries);
88   ASSERT_TRUE(detok_from_elf_.ok());
89   EXPECT_EQ(detok_from_elf_->Detokenize("\xd6\x8c\x66\x2e").BestString(),
90             "Jello, world!");
91 }
92 
TEST_F(Detokenize,BestString_MissingToken_IsEmpty)93 TEST_F(Detokenize, BestString_MissingToken_IsEmpty) {
94   EXPECT_FALSE(detok_.Detokenize("").ok());
95   EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty());
96 }
97 
TEST_F(Detokenize,BestString_ShorterToken_ZeroExtended)98 TEST_F(Detokenize, BestString_ShorterToken_ZeroExtended) {
99   EXPECT_EQ(detok_.Detokenize("\x42", 1u).token(), 0x42u);
100   EXPECT_EQ(detok_.Detokenize("\1\0"sv).token(), 0x1u);
101   EXPECT_EQ(detok_.Detokenize("\1\0\3"sv).token(), 0x030001u);
102   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).token(), 0x0u);
103 }
104 
TEST_F(Detokenize,BestString_UnknownToken_IsEmpty)105 TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) {
106   EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
107   EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty());
108   EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty());
109   EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty());
110   EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty());
111 }
112 
TEST_F(Detokenize,BestStringWithErrors_MissingToken_ErrorMessage)113 TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) {
114   EXPECT_FALSE(detok_.Detokenize("").ok());
115   EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(),
116             ERR("missing token"));
117 }
118 
TEST_F(Detokenize,BestStringWithErrors_ShorterTokenMatchesStrings)119 TEST_F(Detokenize, BestStringWithErrors_ShorterTokenMatchesStrings) {
120   EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(), "One");
121   EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(), "One");
122   EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(), "One");
123 }
124 
TEST_F(Detokenize,BestStringWithErrors_UnknownToken_ErrorMessage)125 TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) {
126   ASSERT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
127   EXPECT_EQ(detok_.Detokenize("\0"sv).BestStringWithErrors(),
128             ERR("unknown token 00000000"));
129   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(),
130             ERR("unknown token 00000000"));
131   EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(),
132             ERR("unknown token 00000000"));
133   EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(),
134             ERR("unknown token 00000002"));
135   EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(),
136             ERR("unknown token 76543210"));
137   EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(),
138             ERR("unknown token fedcba98"));
139 }
140 
141 // Base64 versions of the tokens
142 #define ONE "$AQAAAA=="
143 #define TWO "$BQAAAA=="
144 #define THREE "$/wAAAA=="
145 #define FOUR "$/+7u3Q=="
146 #define NEST_ONE "$7u7u7g=="
147 
TEST_F(Detokenize,Base64_NoArguments)148 TEST_F(Detokenize, Base64_NoArguments) {
149   for (auto [data, expected] : TestCases(
150            Case{ONE, "One"},
151            Case{TWO, "TWO"},
152            Case{THREE, "333"},
153            Case{FOUR, "FOUR"},
154            Case{FOUR ONE ONE, "FOUROneOne"},
155            Case{ONE TWO THREE FOUR, "OneTWO333FOUR"},
156            Case{ONE "\r\n" TWO "\r\n" THREE "\r\n" FOUR "\r\n",
157                 "One\r\nTWO\r\n333\r\nFOUR\r\n"},
158            Case{"123" FOUR, "123FOUR"},
159            Case{"123" FOUR ", 56", "123FOUR, 56"},
160            Case{"12" THREE FOUR ", 56", "12333FOUR, 56"},
161            Case{"$0" ONE, "$0One"},
162            Case{"$/+7u3Q=", "$/+7u3Q="},  // incomplete message (missing "=")
163            Case{"$123456==" FOUR, "$123456==FOUR"},
164            Case{NEST_ONE, "One"},
165            Case{NEST_ONE NEST_ONE NEST_ONE, "OneOneOne"},
166            Case{FOUR "$" ONE NEST_ONE "?", "FOUR$OneOne?"})) {
167     EXPECT_EQ(detok_.DetokenizeText(data), expected);
168   }
169 }
170 
TEST_F(Detokenize,OptionallyTokenizedData)171 TEST_F(Detokenize, OptionallyTokenizedData) {
172   for (auto [data, expected] : TestCases(
173            Case{ONE, "One"},
174            Case{"\1\0\0\0", "One"},
175            Case{TWO, "TWO"},
176            Case{THREE, "333"},
177            Case{FOUR, "FOUR"},
178            Case{FOUR ONE ONE, "FOUROneOne"},
179            Case{ONE TWO THREE FOUR, "OneTWO333FOUR"},
180            Case{ONE "\r\n" TWO "\r\n" THREE "\r\n" FOUR "\r\n",
181                 "One\r\nTWO\r\n333\r\nFOUR\r\n"},
182            Case{"123" FOUR, "123FOUR"},
183            Case{"123" FOUR ", 56", "123FOUR, 56"},
184            Case{"12" THREE FOUR ", 56", "12333FOUR, 56"},
185            Case{"$0" ONE, "$0One"},
186            Case{"$/+7u3Q=", "$/+7u3Q="},  // incomplete message (missing "=")
187            Case{"$123456==" FOUR, "$123456==FOUR"},
188            Case{NEST_ONE, "One"},
189            Case{NEST_ONE NEST_ONE NEST_ONE, "OneOneOne"},
190            Case{FOUR "$" ONE NEST_ONE "?", "FOUR$OneOne?"},
191            Case{"$naeX+A==",
192                 "■msg♦This is One message■module♦■file♦file.txt"})) {
193     EXPECT_EQ(detok_.DecodeOptionallyTokenizedData(as_bytes(span(data))),
194               std::string(expected));
195   }
196 }
197 
198 constexpr char kDataWithArguments[] =
199     "TOKENS\0\0"
200     "\x09\x00\x00\x00"
201     "\0\0\0\0"
202     "\x00\x00\x00\x00----"
203     "\x0A\x0B\x0C\x0D----"
204     "\x0E\x0F\x00\x01----"
205     "\xAA\xAA\xAA\xAA----"
206     "\xBB\xBB\xBB\xBB----"
207     "\xCC\xCC\xCC\xCC----"
208     "\xDD\xDD\xDD\xDD----"
209     "\xEE\xEE\xEE\xEE----"
210     "\xFF\xFF\xFF\xFF----"
211     "\0"
212     "Use the %s, %s.\0"
213     "Now there are %d of %s!\0"
214     "%c!\0"    // AA
215     "%hhu!\0"  // BB
216     "%hu!\0"   // CC
217     "%u!\0"    // DD
218     "%lu!\0"   // EE
219     "%llu!";   // FF
220 
221 constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>();
222 class DetokenizeWithArgs : public ::testing::Test {
223  protected:
DetokenizeWithArgs()224   DetokenizeWithArgs() : detok_(kWithArgs) {}
225 
226   Detokenizer detok_;
227 };
228 
TEST_F(DetokenizeWithArgs,NoMatches)229 TEST_F(DetokenizeWithArgs, NoMatches) {
230   EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty());
231 }
232 
TEST_F(DetokenizeWithArgs,SingleMatch)233 TEST_F(DetokenizeWithArgs, SingleMatch) {
234   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u);
235 }
236 
TEST_F(DetokenizeWithArgs,Empty)237 TEST_F(DetokenizeWithArgs, Empty) {
238   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), "");
239 }
240 
TEST_F(DetokenizeWithArgs,Successful)241 TEST_F(DetokenizeWithArgs, Successful) {
242   // Run through test cases, but don't include cases that use %hhu or %llu since
243   // these are not currently supported in arm-none-eabi-gcc.
244   for (auto [data, expected] : TestCases(
245            Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."},
246            Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"},
247            Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"},
248            Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"},
249            Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"},
250            Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"},
251            Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"},
252            Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) {
253     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
254   }
255 }
256 
TEST_F(DetokenizeWithArgs,ExtraDataError)257 TEST_F(DetokenizeWithArgs, ExtraDataError) {
258   auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv);
259   EXPECT_FALSE(error.ok());
260   EXPECT_EQ("", error.BestString());
261 }
262 
TEST_F(DetokenizeWithArgs,MissingArgumentError)263 TEST_F(DetokenizeWithArgs, MissingArgumentError) {
264   auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv);
265   EXPECT_FALSE(error.ok());
266   EXPECT_EQ(error.BestString(), "Use the force, %s.");
267   EXPECT_EQ(error.BestStringWithErrors(),
268             "Use the force, " ERR("%s MISSING") ".");
269 }
270 
TEST_F(DetokenizeWithArgs,DecodingError)271 TEST_F(DetokenizeWithArgs, DecodingError) {
272   auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv);
273   EXPECT_FALSE(error.ok());
274   EXPECT_EQ(error.BestString(), "Now there are %d of %s!");
275   EXPECT_EQ(error.BestStringWithErrors(),
276             "Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!");
277 }
278 
279 constexpr char kDataWithCollisions[] =
280     "TOKENS\0\0"
281     "\x0F\x00\x00\x00"
282     "\0\0\0\0"
283     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 1
284     "\x00\x00\x00\x00\x01\x02\x03\x04"  // 2
285     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 3
286     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 4
287     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 5
288     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 6
289     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 7
290     "\xAA\xAA\xAA\xAA\x00\x00\x00\x00"  // 8
291     "\xAA\xAA\xAA\xAA\xff\xff\xff\xff"  // 9
292     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // A
293     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // B
294     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // C
295     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // D
296     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // E
297     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // F
298     // String table
299     "This string is present\0"   // 1
300     "This string is removed\0"   // 2
301     "One arg %d\0"               // 3
302     "One arg %s\0"               // 4
303     "Two args %s %u\0"           // 5
304     "Two args %s %s %% %% %%\0"  // 6
305     "Four args %d %d %d %d\0"    // 7
306     "This one is removed\0"      // 8
307     "This one is present\0"      // 9
308     "Two ints %d %d\0"           // A
309     "Three ints %d %d %d\0"      // B
310     "Three strings %s %s %s\0"   // C
311     "Two strings %s %s\0"        // D
312     "Three %s %s %s\0"           // E
313     "Five %d %d %d %d %s\0";     // F
314 
315 constexpr TokenDatabase kWithCollisions =
316     TokenDatabase::Create<kDataWithCollisions>();
317 
318 class DetokenizeWithCollisions : public ::testing::Test {
319  protected:
DetokenizeWithCollisions()320   DetokenizeWithCollisions() : detok_(kWithCollisions) {}
321 
322   Detokenizer detok_;
323 };
324 
TEST_F(DetokenizeWithCollisions,Collision_AlwaysPreferSuccessfulDecode)325 TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) {
326   for (auto [data, expected] :
327        TestCases(Case{"\0\0\0\0"sv, "This string is present"},
328                  Case{"\0\0\0\0\x01"sv, "One arg -1"},
329                  Case{"\0\0\0\0\x80"sv, "One arg [...]"},
330                  Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) {
331     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
332   }
333 }
334 
TEST_F(DetokenizeWithCollisions,Collision_PreferDecodingAllBytes)335 TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) {
336   for (auto [data, expected] :
337        TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"},
338                  Case{"\0\0\0\0\x08?"sv, "One arg %s"},
339                  Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) {
340     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
341   }
342 }
343 
TEST_F(DetokenizeWithCollisions,Collision_PreferFewestDecodingErrors)344 TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) {
345   for (auto [data, expected] :
346        TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"},
347                  Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) {
348     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
349   }
350 }
351 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs)352 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) {
353   auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv);
354   EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s");
355   EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv);
356 }
357 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs_NoPercent)358 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) {
359   // The "Two args %s %s ..." string successfully decodes this, and has more
360   // "arguments", because of %%, but %% doesn't count as as a decoded argument.
361   EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(),
362             "Four args -1 0 -1 1");
363 }
364 
TEST_F(DetokenizeWithCollisions,Collision_PreferStillPresentString)365 TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) {
366   for (auto [data, expected] :
367        TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"},
368                  Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) {
369     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
370   }
371 }
372 
TEST_F(DetokenizeWithCollisions,Collision_TracksAllMatches)373 TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) {
374   auto result = detok_.Detokenize("\0\0\0\0"sv);
375   EXPECT_EQ(result.matches().size(), 7u);
376 }
377 
378 }  // namespace
379 }  // namespace pw::tokenizer
380