1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 // decode.h defines classes that implement tokenized string decoding. These
16 // classes should not be used directly; instead decode tokenized messages with
17 // the Detokenizer class, defined in pw_tokenizer/detokenize.h.
18 #pragma once
19
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <string>
24 #include <string_view>
25 #include <utility>
26 #include <vector>
27
28 #include "pw_span/span.h"
29
30 // Decoding errors are marked with prefix and suffix so that they stand out from
31 // the rest of the decoded strings. These macros are used to build decoding
32 // error strings.
33 #define PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "<["
34 #define PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX "]>"
35 #define PW_TOKENIZER_ARG_DECODING_ERROR(message) \
36 PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX message \
37 PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX
38
39 namespace pw::tokenizer {
40
41 // The status of an argument that was decoded from an encoded tokenized string.
42 // This enum should match the values in decode.py's DecodedArg class.
43 class ArgStatus {
44 public:
45 // The Code enum tracks issues arise when decoding a tokenized string
46 // argument. Each value is one bit, and an ArgStatus will have multiple bits
47 // set if multiple issues are encountered.
48 enum Code : unsigned {
49 kOk = 0, // Decoding was successful.
50 kMissing = 1, // The argument was not present in the data.
51 kTruncated = 2, // The argument was truncated during encoding.
52 kDecodeError = 4, // An error occurred while decoding the argument.
53 kSkipped = 8, // Argument was skipped due to a previous error.
54 };
55
status_(code)56 constexpr ArgStatus(Code code = kOk) : status_(code) {}
57
58 // Sets additional status bits.
Update(ArgStatus status)59 constexpr void Update(ArgStatus status) { status_ |= status.status_; }
60
61 // True if no decoding errors occurred. Truncated is considered OK, since
62 // encoding and decoding still occurs successfully when a string is truncated.
ok()63 constexpr bool ok() const { return status_ == kOk || status_ == kTruncated; }
64
65 // Checks if an error flag is set in the status.
HasError(Code code)66 constexpr bool HasError(Code code) const { return (status_ & code) != 0u; }
67
68 private:
69 // Since multiple Code bits may be set in an ArgStatus, the status is stored
70 // as an unsigned instead of a Code.
71 unsigned status_;
72 };
73
74 // An argument decoded from an encoded tokenized message.
75 class DecodedArg {
76 public:
77 // Constructs a DecodedArg from a decoded value. The value is formatted into a
78 // string using the provided format string. The number of bytes that were
79 // decoded to get the value are provided in raw_size_bytes.
80 template <typename ArgumentType>
81 static DecodedArg FromValue(const char* format_string,
82 ArgumentType value,
83 size_t raw_size_bytes,
84 ArgStatus arg_status = ArgStatus::kOk);
85
86 // Constructs a DecodedArg that represents a string literal in the format
87 // string (plain text or % character).
DecodedArg(const std::string & literal)88 DecodedArg(const std::string& literal)
89 : value_(literal), raw_data_size_bytes_(0) {}
90
91 // Constructs a DecodedArg that encountered an error during decoding.
92 DecodedArg(ArgStatus error,
93 const std::string_view& spec,
94 size_t raw_size_bytes = 0u,
95 const std::string_view& value = {});
96
97 // This argument's value as a string. If an error occurred while decoding this
98 // argument, value() will be an error message.
value()99 const std::string& value() const { return value_; }
100
101 // Returns the conversion specification for this argument (e.g. %02x). This is
102 // empty for literals or "%%".
spec()103 const std::string& spec() const { return spec_; }
104
105 // True if this argument decoded successfully.
ok()106 bool ok() const { return status_.ok(); }
107
108 // How many bytes this arg occupied in the encoded arguments.
raw_size_bytes()109 size_t raw_size_bytes() const { return raw_data_size_bytes_; }
110
111 private:
DecodedArg(const char * format,size_t raw_size_bytes,ArgStatus status)112 DecodedArg(const char* format, size_t raw_size_bytes, ArgStatus status)
113 : spec_(format), raw_data_size_bytes_(raw_size_bytes), status_(status) {}
114
115 std::string value_;
116 std::string spec_;
117 size_t raw_data_size_bytes_;
118 ArgStatus status_;
119 };
120
121 // Represents a segment of a printf-style format string. Each StringSegment
122 // contains either literal text or a format specifier.
123 class StringSegment {
124 public:
125 // Parses a format specifier from the text and returns a StringSegment that
126 // represents it. Returns an empty StringSegment if no valid format specifier
127 // was found.
128 static StringSegment ParseFormatSpec(const char* format);
129
130 // Creates a StringSegment that represents a piece of plain text.
StringSegment(const std::string_view & text)131 StringSegment(const std::string_view& text) : StringSegment(text, kLiteral) {}
132
133 // Returns the DecodedArg with this StringSegment decoded according to the
134 // provided arguments.
135 DecodedArg Decode(const span<const uint8_t>& arguments) const;
136
137 // Skips decoding this StringSegment. Literals and %% are expanded as normal.
138 DecodedArg Skip() const;
139
empty()140 bool empty() const { return text_.empty(); }
141
text()142 const std::string& text() const { return text_; }
143
144 private:
145 enum Type {
146 kLiteral,
147 kPercent, // %% format specifier
148 kString,
149 kSignedInt,
150 kUnsigned32,
151 kUnsigned64,
152 kFloatingPoint,
153 };
154
155 // Varargs-promoted size of args on this machine; only needed for ints or %p.
156 enum ArgSize : bool { k32Bit, k64Bit };
157
158 template <typename T>
VarargSize()159 static constexpr ArgSize VarargSize() {
160 return sizeof(T) == sizeof(int64_t) ? k64Bit : k32Bit;
161 }
162
163 static ArgSize VarargSize(std::array<char, 2> length, char spec);
164
StringSegment()165 StringSegment() : type_(kLiteral) {}
166
StringSegment(const std::string_view & text,Type type)167 StringSegment(const std::string_view& text, Type type)
168 : StringSegment(text, type, VarargSize<void*>()) {}
169
StringSegment(const std::string_view & text,Type type,ArgSize local_size)170 StringSegment(const std::string_view& text, Type type, ArgSize local_size)
171 : text_(text), type_(type), local_size_(local_size) {}
172
173 DecodedArg DecodeString(const span<const uint8_t>& arguments) const;
174
175 DecodedArg DecodeInteger(const span<const uint8_t>& arguments) const;
176
177 DecodedArg DecodeFloatingPoint(const span<const uint8_t>& arguments) const;
178
179 std::string text_;
180 Type type_;
181 ArgSize local_size_; // Arg size to use for snprintf on this machine.
182 };
183
184 // The result of decoding a tokenized message with a FormatString. Stores
185 // decoded arguments and whether there was any undecoded data. This is returned
186 // from a FormatString::Format call.
187 class DecodedFormatString {
188 public:
DecodedFormatString(std::vector<DecodedArg> && segments,size_t remaining_bytes)189 DecodedFormatString(std::vector<DecodedArg>&& segments,
190 size_t remaining_bytes)
191 : segments_(std::move(segments)), remaining_bytes_(remaining_bytes) {}
192
193 DecodedFormatString(const DecodedFormatString&) = default;
194 DecodedFormatString(DecodedFormatString&&) = default;
195
196 DecodedFormatString& operator=(const DecodedFormatString&) = default;
197 DecodedFormatString& operator=(DecodedFormatString&&) = default;
198
199 // Returns the decoded format string. If any argument decoding errors
200 // occurred, the % conversion specifiers are included unmodified.
201 std::string value() const;
202
203 // Returns the decoded format string, with error messages for any arguments
204 // that failed to decode.
205 std::string value_with_errors() const;
206
ok()207 bool ok() const { return remaining_bytes() == 0u && decoding_errors() == 0u; }
208
209 // Returns the number of bytes that remained after decoding.
remaining_bytes()210 size_t remaining_bytes() const { return remaining_bytes_; }
211
212 // Returns the number of arguments in the format string. %% is not included.
213 size_t argument_count() const;
214
215 // Returns the number of arguments that failed to decode.
216 size_t decoding_errors() const;
217
218 private:
219 std::vector<DecodedArg> segments_;
220 size_t remaining_bytes_;
221 };
222
223 // Represents a printf-style format string. The string is stored as a vector of
224 // StringSegments.
225 class FormatString {
226 public:
227 // Constructs a FormatString from a null-terminated format string.
228 FormatString(const char* format_string);
229
230 // Formats this format string according to the provided encoded arguments and
231 // returns a string.
232 DecodedFormatString Format(span<const uint8_t> arguments) const;
233
Format(const std::string_view & arguments)234 DecodedFormatString Format(const std::string_view& arguments) const {
235 return Format(span(reinterpret_cast<const uint8_t*>(arguments.data()),
236 arguments.size()));
237 }
238
239 private:
240 std::vector<StringSegment> segments_;
241 };
242
243 // Implementation of DecodedArg::FromValue template function.
244 template <typename ArgumentType>
FromValue(const char * format,ArgumentType value,size_t raw_size_bytes,ArgStatus status)245 DecodedArg DecodedArg::FromValue(const char* format,
246 ArgumentType value,
247 size_t raw_size_bytes,
248 ArgStatus status) {
249 DecodedArg arg(format, raw_size_bytes, status);
250 const int value_size = std::snprintf(nullptr, 0u, format, value);
251
252 if (value_size < 0) {
253 arg.status_.Update(ArgStatus::kDecodeError);
254 return arg;
255 }
256
257 // Reserve space in the value string for the snprintf call.
258 arg.value_.append(value_size + 1, '\0');
259
260 // Print the value to the string in the reserved space, then pop off the \0.
261 std::snprintf(arg.value_.data(), arg.value_.size(), format, value);
262 arg.value_.pop_back(); // Remove the trailing \0.
263
264 return arg;
265 }
266
267 } // namespace pw::tokenizer
268