• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/internal/decode.h"
16 
17 #include <algorithm>
18 #include <array>
19 #include <cctype>
20 #include <cstring>
21 
22 #include "pw_varint/varint.h"
23 
24 namespace pw::tokenizer {
25 namespace {
26 
27 // Functions for parsing a printf format specifier.
SkipFlags(const char * str)28 size_t SkipFlags(const char* str) {
29   size_t i = 0;
30   while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' ||
31          str[i] == '0') {
32     i += 1;
33   }
34   return i;
35 }
36 
SkipAsteriskOrInteger(const char * str)37 size_t SkipAsteriskOrInteger(const char* str) {
38   if (str[0] == '*') {
39     return 1;
40   }
41 
42   size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0;
43 
44   while (std::isdigit(str[i])) {
45     i += 1;
46   }
47   return i;
48 }
49 
ReadLengthModifier(const char * str)50 std::array<char, 2> ReadLengthModifier(const char* str) {
51   // Check for ll or hh.
52   if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) {
53     return {str[0], str[1]};
54   }
55   if (std::strchr("hljztL", str[0]) != nullptr) {
56     return {str[0]};
57   }
58   return {};
59 }
60 
61 // Returns the error message that is used in place of a decoded arg when an
62 // error occurs.
ErrorMessage(ArgStatus status,const std::string_view & spec,const std::string_view & value)63 std::string ErrorMessage(ArgStatus status,
64                          const std::string_view& spec,
65                          const std::string_view& value) {
66   const char* message;
67   if (status.HasError(ArgStatus::kSkipped)) {
68     message = "SKIPPED";
69   } else if (status.HasError(ArgStatus::kMissing)) {
70     message = "MISSING";
71   } else if (status.HasError(ArgStatus::kDecodeError)) {
72     message = "ERROR";
73   } else {
74     message = "INTERNAL ERROR";
75   }
76 
77   std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX);
78   result.append(spec);
79   result.push_back(' ');
80   result.append(message);
81 
82   if (!value.empty()) {
83     result.push_back(' ');
84     result.push_back('(');
85     result.append(value);
86     result.push_back(')');
87   }
88 
89   result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
90   return result;
91 }
92 
93 }  // namespace
94 
DecodedArg(ArgStatus error,const std::string_view & spec,size_t raw_size_bytes,const std::string_view & value)95 DecodedArg::DecodedArg(ArgStatus error,
96                        const std::string_view& spec,
97                        size_t raw_size_bytes,
98                        const std::string_view& value)
99     : value_(ErrorMessage(error, spec, value)),
100       spec_(spec),
101       raw_data_size_bytes_(raw_size_bytes),
102       status_(error) {}
103 
ParseFormatSpec(const char * format)104 StringSegment StringSegment::ParseFormatSpec(const char* format) {
105   if (format[0] != '%' || format[1] == '\0') {
106     return StringSegment();
107   }
108 
109   // Parse the format specifier.
110   size_t i = 1;
111 
112   // Skip the flags.
113   i += SkipFlags(&format[i]);
114 
115   // Skip the field width.
116   i += SkipAsteriskOrInteger(&format[i]);
117 
118   // Skip the precision.
119   if (format[i] == '.') {
120     i += 1;
121     i += SkipAsteriskOrInteger(&format[i]);
122   }
123 
124   // Read the length modifier.
125   const std::array<char, 2> length = ReadLengthModifier(&format[i]);
126   i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
127 
128   // Read the conversion specifier.
129   const char spec = format[i];
130 
131   Type type;
132   if (spec == 's') {
133     type = kString;
134   } else if (spec == 'c' || spec == 'd' || spec == 'i') {
135     type = kSignedInt;
136   } else if (std::strchr("oxXup", spec) != nullptr) {
137     // The source size matters for unsigned integers because they need to be
138     // masked off to their correct length, since zig-zag decode sign extends.
139     // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p
140     // needs to be 64-bit on these targets.
141     type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32;
142   } else if (std::strchr("fFeEaAgG", spec) != nullptr) {
143     type = kFloatingPoint;
144   } else if (spec == '%' && i == 1) {
145     type = kPercent;
146   } else {
147     return StringSegment();
148   }
149 
150   return {std::string_view(format, i + 1), type, VarargSize(length, spec)};
151 }
152 
VarargSize(std::array<char,2> length,char spec)153 StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length,
154                                                  char spec) {
155   // Use pointer size for %p or any other type (for which this doesn't matter).
156   if (std::strchr("cdioxXu", spec) == nullptr) {
157     return VarargSize<void*>();
158   }
159   if (length[0] == 'l') {
160     return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>();
161   }
162   if (length[0] == 'j') {
163     return VarargSize<intmax_t>();
164   }
165   if (length[0] == 'z') {
166     return VarargSize<size_t>();
167   }
168   if (length[0] == 't') {
169     return VarargSize<ptrdiff_t>();
170   }
171   return VarargSize<int>();
172 }
173 
DecodeString(const span<const uint8_t> & arguments) const174 DecodedArg StringSegment::DecodeString(
175     const span<const uint8_t>& arguments) const {
176   if (arguments.empty()) {
177     return DecodedArg(ArgStatus::kMissing, text_);
178   }
179 
180   ArgStatus status =
181       (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated;
182 
183   const uint_fast8_t size = arguments[0] & 0x7Fu;
184 
185   if (arguments.size() - 1 < size) {
186     status.Update(ArgStatus::kDecodeError);
187     span<const uint8_t> arg_val = arguments.subspan(1);
188     return DecodedArg(
189         status,
190         text_,
191         arguments.size(),
192         {reinterpret_cast<const char*>(arg_val.data()), arg_val.size()});
193   }
194 
195   std::string value(reinterpret_cast<const char*>(arguments.data() + 1), size);
196 
197   if (status.HasError(ArgStatus::kTruncated)) {
198     value.append("[...]");
199   }
200 
201   return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status);
202 }
203 
DecodeInteger(const span<const uint8_t> & arguments) const204 DecodedArg StringSegment::DecodeInteger(
205     const span<const uint8_t>& arguments) const {
206   if (arguments.empty()) {
207     return DecodedArg(ArgStatus::kMissing, text_);
208   }
209 
210   int64_t value;
211   const size_t bytes = varint::Decode(as_bytes(arguments), &value);
212 
213   if (bytes == 0u) {
214     return DecodedArg(ArgStatus::kDecodeError,
215                       text_,
216                       std::min(varint::kMaxVarint64SizeBytes,
217                                static_cast<size_t>(arguments.size())));
218   }
219 
220   // Unsigned ints need to be masked to their bit width due to sign extension.
221   if (type_ == kUnsigned32) {
222     value &= 0xFFFFFFFFu;
223   }
224 
225   if (local_size_ == k32Bit) {
226     return DecodedArg::FromValue(
227         text_.c_str(), static_cast<uint32_t>(value), bytes);
228   }
229   return DecodedArg::FromValue(text_.c_str(), value, bytes);
230 }
231 
DecodeFloatingPoint(const span<const uint8_t> & arguments) const232 DecodedArg StringSegment::DecodeFloatingPoint(
233     const span<const uint8_t>& arguments) const {
234   static_assert(sizeof(float) == 4u);
235   if (arguments.size() < sizeof(float)) {
236     return DecodedArg(ArgStatus::kMissing, text_);
237   }
238 
239   float value;
240   std::memcpy(&value, arguments.data(), sizeof(value));
241   return DecodedArg::FromValue(text_.c_str(), value, sizeof(value));
242 }
243 
Decode(const span<const uint8_t> & arguments) const244 DecodedArg StringSegment::Decode(const span<const uint8_t>& arguments) const {
245   switch (type_) {
246     case kLiteral:
247       return DecodedArg(text_);
248     case kPercent:
249       return DecodedArg("%");
250     case kString:
251       return DecodeString(arguments);
252     case kSignedInt:
253     case kUnsigned32:
254     case kUnsigned64:
255       return DecodeInteger(arguments);
256     case kFloatingPoint:
257       return DecodeFloatingPoint(arguments);
258   }
259 
260   return DecodedArg(ArgStatus::kDecodeError, text_);
261 }
262 
Skip() const263 DecodedArg StringSegment::Skip() const {
264   switch (type_) {
265     case kLiteral:
266       return DecodedArg(text_);
267     case kPercent:
268       return DecodedArg("%");
269     case kString:
270     case kSignedInt:
271     case kUnsigned32:
272     case kUnsigned64:
273     case kFloatingPoint:
274     default:
275       return DecodedArg(ArgStatus::kSkipped, text_);
276   }
277 }
278 
value() const279 std::string DecodedFormatString::value() const {
280   std::string output;
281 
282   for (const DecodedArg& arg : segments_) {
283     output.append(arg.ok() ? arg.value() : arg.spec());
284   }
285 
286   return output;
287 }
288 
value_with_errors() const289 std::string DecodedFormatString::value_with_errors() const {
290   std::string output;
291 
292   for (const DecodedArg& arg : segments_) {
293     output.append(arg.value());
294   }
295 
296   return output;
297 }
298 
argument_count() const299 size_t DecodedFormatString::argument_count() const {
300   return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
301     return !arg.spec().empty();
302   });
303 }
304 
decoding_errors() const305 size_t DecodedFormatString::decoding_errors() const {
306   return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
307     return !arg.ok();
308   });
309 }
310 
FormatString(const char * format)311 FormatString::FormatString(const char* format) {
312   const char* text_start = format;
313 
314   while (format[0] != '\0') {
315     if (StringSegment spec = StringSegment::ParseFormatSpec(format);
316         !spec.empty()) {
317       // Add the text segment seen so far (if any).
318       if (text_start < format) {
319         segments_.emplace_back(
320             std::string_view(text_start, format - text_start));
321       }
322 
323       // Move along the index and text segment start.
324       format += spec.text().size();
325       text_start = format;
326 
327       // Add the format specifier that was just found.
328       segments_.push_back(std::move(spec));
329     } else {
330       format += 1;
331     }
332   }
333 
334   if (text_start < format) {
335     segments_.emplace_back(std::string_view(text_start, format - text_start));
336   }
337 }
338 
Format(span<const uint8_t> arguments) const339 DecodedFormatString FormatString::Format(span<const uint8_t> arguments) const {
340   std::vector<DecodedArg> results;
341   bool skip = false;
342 
343   for (const auto& segment : segments_) {
344     if (skip) {
345       results.push_back(segment.Skip());
346     } else {
347       results.push_back(segment.Decode(arguments));
348       arguments = arguments.subspan(results.back().raw_size_bytes());
349 
350       // If an error occurred, skip decoding the remaining arguments.
351       if (!results.back().ok()) {
352         skip = true;
353       }
354     }
355   }
356 
357   return DecodedFormatString(std::move(results), arguments.size());
358 }
359 
360 }  // namespace pw::tokenizer
361