• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/detokenize.h"
16 
17 #include <string_view>
18 
19 #include "gtest/gtest.h"
20 
21 namespace pw::tokenizer {
22 namespace {
23 
24 using namespace std::literals::string_view_literals;
25 
26 // Use a shorter name for the error string macro.
27 #define ERR PW_TOKENIZER_ARG_DECODING_ERROR
28 
29 // Use alignas to ensure that the data is properly aligned to be read from a
30 // token database entry struct. This avoids unaligned memory reads.
31 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
32     "TOKENS\0\0"
33     "\x04\x00\x00\x00"
34     "\0\0\0\0"
35     "\x01\x00\x00\x00----"
36     "\x05\x00\x00\x00----"
37     "\xFF\x00\x00\x00----"
38     "\xFF\xEE\xEE\xDD----"
39     "One\0"
40     "TWO\0"
41     "333\0"
42     "FOUR";
43 
44 class Detokenize : public ::testing::Test {
45  protected:
Detokenize()46   Detokenize() : detok_(TokenDatabase::Create<kBasicData>()) {}
47   Detokenizer detok_;
48 };
49 
TEST_F(Detokenize,NoFormatting)50 TEST_F(Detokenize, NoFormatting) {
51   EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One");
52   EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO");
53   EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333");
54   EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR");
55 }
56 
TEST_F(Detokenize,BestString_MissingToken_IsEmpty)57 TEST_F(Detokenize, BestString_MissingToken_IsEmpty) {
58   EXPECT_FALSE(detok_.Detokenize("").ok());
59   EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty());
60 }
61 
TEST_F(Detokenize,BestString_ShorterToken_ZeroExtended)62 TEST_F(Detokenize, BestString_ShorterToken_ZeroExtended) {
63   EXPECT_EQ(detok_.Detokenize("\x42", 1u).token(), 0x42u);
64   EXPECT_EQ(detok_.Detokenize("\1\0"sv).token(), 0x1u);
65   EXPECT_EQ(detok_.Detokenize("\1\0\3"sv).token(), 0x030001u);
66   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).token(), 0x0u);
67 }
68 
TEST_F(Detokenize,BestString_UnknownToken_IsEmpty)69 TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) {
70   EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
71   EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty());
72   EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty());
73   EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty());
74   EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty());
75 }
76 
TEST_F(Detokenize,BestStringWithErrors_MissingToken_ErrorMessage)77 TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) {
78   EXPECT_FALSE(detok_.Detokenize("").ok());
79   EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(),
80             ERR("missing token"));
81 }
82 
TEST_F(Detokenize,BestStringWithErrors_ShorterTokenMatchesStrings)83 TEST_F(Detokenize, BestStringWithErrors_ShorterTokenMatchesStrings) {
84   EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(), "One");
85   EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(), "One");
86   EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(), "One");
87 }
88 
TEST_F(Detokenize,BestStringWithErrors_UnknownToken_ErrorMessage)89 TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) {
90   ASSERT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
91   EXPECT_EQ(detok_.Detokenize("\0"sv).BestStringWithErrors(),
92             ERR("unknown token 00000000"));
93   EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(),
94             ERR("unknown token 00000000"));
95   EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(),
96             ERR("unknown token 00000000"));
97   EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(),
98             ERR("unknown token 00000002"));
99   EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(),
100             ERR("unknown token 76543210"));
101   EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(),
102             ERR("unknown token fedcba98"));
103 }
104 
105 alignas(TokenDatabase::RawEntry) constexpr char kDataWithArguments[] =
106     "TOKENS\0\0"
107     "\x09\x00\x00\x00"
108     "\0\0\0\0"
109     "\x00\x00\x00\x00----"
110     "\x0A\x0B\x0C\x0D----"
111     "\x0E\x0F\x00\x01----"
112     "\xAA\xAA\xAA\xAA----"
113     "\xBB\xBB\xBB\xBB----"
114     "\xCC\xCC\xCC\xCC----"
115     "\xDD\xDD\xDD\xDD----"
116     "\xEE\xEE\xEE\xEE----"
117     "\xFF\xFF\xFF\xFF----"
118     "\0"
119     "Use the %s, %s.\0"
120     "Now there are %d of %s!\0"
121     "%c!\0"    // AA
122     "%hhu!\0"  // BB
123     "%hu!\0"   // CC
124     "%u!\0"    // DD
125     "%lu!\0"   // EE
126     "%llu!";   // FF
127 
128 constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>();
129 
130 using Case = std::pair<std::string_view, std::string_view>;
131 
132 template <typename... Args>
TestCases(Args...args)133 auto TestCases(Args... args) {
134   return std::array<Case, sizeof...(Args)>{args...};
135 }
136 
137 class DetokenizeWithArgs : public ::testing::Test {
138  protected:
DetokenizeWithArgs()139   DetokenizeWithArgs() : detok_(kWithArgs) {}
140 
141   Detokenizer detok_;
142 };
143 
TEST_F(DetokenizeWithArgs,NoMatches)144 TEST_F(DetokenizeWithArgs, NoMatches) {
145   EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty());
146 }
147 
TEST_F(DetokenizeWithArgs,SingleMatch)148 TEST_F(DetokenizeWithArgs, SingleMatch) {
149   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u);
150 }
151 
TEST_F(DetokenizeWithArgs,Empty)152 TEST_F(DetokenizeWithArgs, Empty) {
153   EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), "");
154 }
155 
TEST_F(DetokenizeWithArgs,Successful)156 TEST_F(DetokenizeWithArgs, Successful) {
157   // Run through test cases, but don't include cases that use %hhu or %llu since
158   // these are not currently supported in arm-none-eabi-gcc.
159   for (auto [data, expected] : TestCases(
160            Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."},
161            Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"},
162            Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"},
163            Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"},
164            Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"},
165            Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"},
166            Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"},
167            Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) {
168     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
169   }
170 }
171 
TEST_F(DetokenizeWithArgs,ExtraDataError)172 TEST_F(DetokenizeWithArgs, ExtraDataError) {
173   auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv);
174   EXPECT_FALSE(error.ok());
175   EXPECT_EQ("", error.BestString());
176 }
177 
TEST_F(DetokenizeWithArgs,MissingArgumentError)178 TEST_F(DetokenizeWithArgs, MissingArgumentError) {
179   auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv);
180   EXPECT_FALSE(error.ok());
181   EXPECT_EQ(error.BestString(), "Use the force, %s.");
182   EXPECT_EQ(error.BestStringWithErrors(),
183             "Use the force, " ERR("%s MISSING") ".");
184 }
185 
TEST_F(DetokenizeWithArgs,DecodingError)186 TEST_F(DetokenizeWithArgs, DecodingError) {
187   auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv);
188   EXPECT_FALSE(error.ok());
189   EXPECT_EQ(error.BestString(), "Now there are %d of %s!");
190   EXPECT_EQ(error.BestStringWithErrors(),
191             "Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!");
192 }
193 
194 alignas(TokenDatabase::RawEntry) constexpr char kDataWithCollisions[] =
195     "TOKENS\0\0"
196     "\x0F\x00\x00\x00"
197     "\0\0\0\0"
198     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 1
199     "\x00\x00\x00\x00\x01\x02\x03\x04"  // 2
200     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 3
201     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 4
202     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 5
203     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 6
204     "\x00\x00\x00\x00\xff\xff\xff\xff"  // 7
205     "\xAA\xAA\xAA\xAA\x00\x00\x00\x00"  // 8
206     "\xAA\xAA\xAA\xAA\xff\xff\xff\xff"  // 9
207     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // A
208     "\xBB\xBB\xBB\xBB\xff\xff\xff\xff"  // B
209     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // C
210     "\xCC\xCC\xCC\xCC\xff\xff\xff\xff"  // D
211     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // E
212     "\xDD\xDD\xDD\xDD\xff\xff\xff\xff"  // F
213     // String table
214     "This string is present\0"   // 1
215     "This string is removed\0"   // 2
216     "One arg %d\0"               // 3
217     "One arg %s\0"               // 4
218     "Two args %s %u\0"           // 5
219     "Two args %s %s %% %% %%\0"  // 6
220     "Four args %d %d %d %d\0"    // 7
221     "This one is removed\0"      // 8
222     "This one is present\0"      // 9
223     "Two ints %d %d\0"           // A
224     "Three ints %d %d %d\0"      // B
225     "Three strings %s %s %s\0"   // C
226     "Two strings %s %s\0"        // D
227     "Three %s %s %s\0"           // E
228     "Five %d %d %d %d %s\0";     // F
229 
230 constexpr TokenDatabase kWithCollisions =
231     TokenDatabase::Create<kDataWithCollisions>();
232 
233 class DetokenizeWithCollisions : public ::testing::Test {
234  protected:
DetokenizeWithCollisions()235   DetokenizeWithCollisions() : detok_(kWithCollisions) {}
236 
237   Detokenizer detok_;
238 };
239 
TEST_F(DetokenizeWithCollisions,Collision_AlwaysPreferSuccessfulDecode)240 TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) {
241   for (auto [data, expected] :
242        TestCases(Case{"\0\0\0\0"sv, "This string is present"},
243                  Case{"\0\0\0\0\x01"sv, "One arg -1"},
244                  Case{"\0\0\0\0\x80"sv, "One arg [...]"},
245                  Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) {
246     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
247   }
248 }
249 
TEST_F(DetokenizeWithCollisions,Collision_PreferDecodingAllBytes)250 TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) {
251   for (auto [data, expected] :
252        TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"},
253                  Case{"\0\0\0\0\x08?"sv, "One arg %s"},
254                  Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) {
255     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
256   }
257 }
258 
TEST_F(DetokenizeWithCollisions,Collision_PreferFewestDecodingErrors)259 TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) {
260   for (auto [data, expected] :
261        TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"},
262                  Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) {
263     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
264   }
265 }
266 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs)267 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) {
268   auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv);
269   EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s");
270   EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv);
271 }
272 
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs_NoPercent)273 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) {
274   // The "Two args %s %s ..." string successfully decodes this, and has more
275   // "arguments", because of %%, but %% doesn't count as as a decoded argument.
276   EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(),
277             "Four args -1 0 -1 1");
278 }
279 
TEST_F(DetokenizeWithCollisions,Collision_PreferStillPresentString)280 TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) {
281   for (auto [data, expected] :
282        TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"},
283                  Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) {
284     EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
285   }
286 }
287 
TEST_F(DetokenizeWithCollisions,Collision_TracksAllMatches)288 TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) {
289   auto result = detok_.Detokenize("\0\0\0\0"sv);
290   EXPECT_EQ(result.matches().size(), 7u);
291 }
292 
293 }  // namespace
294 }  // namespace pw::tokenizer
295