• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 #pragma once
15 
16 #ifdef __cplusplus
17 
18 #include <cstddef>
19 #include <cstdint>
20 
21 #else
22 
23 #include <stddef.h>
24 #include <stdint.h>
25 
26 #endif  // __cplusplus
27 
28 #include "pw_polyfill/static_assert.h"
29 #include "pw_preprocessor/arguments.h"
30 #include "pw_preprocessor/compiler.h"
31 #include "pw_preprocessor/concat.h"
32 #include "pw_preprocessor/util.h"
33 #include "pw_tokenizer/internal/argument_types.h"
34 #include "pw_tokenizer/internal/tokenize_string.h"
35 
36 /// The type of the 32-bit token used in place of a string. Also available as
37 /// `pw::tokenizer::Token`.
38 typedef uint32_t pw_tokenizer_Token;
39 
40 // Strings may optionally be tokenized to a domain. Strings in different
41 // domains can be processed separately by the token database tools. Each domain
42 // in use must have a corresponding section declared in the linker script. See
43 // `pw_tokenizer_linker_sections.ld` for more details.
44 //
45 // The default domain is an empty string.
46 #define PW_TOKENIZER_DEFAULT_DOMAIN ""
47 
48 /// Tokenizes a string literal along with an optional domain value. If the
49 /// domain value is present, will tokenize the string using that domain,
50 /// otherwise will tokenize the string using the default domain value.
51 #define PW_TOKENIZE_STRING_OPTIONAL_DOMAIN(...) \
52   PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_, __VA_ARGS__)
53 
54 #define _PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_1(string_literal) \
55   PW_TOKENIZE_STRING(string_literal)
56 
57 #define _PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_2(domain, string_literal) \
58   PW_TOKENIZE_STRING_DOMAIN(domain, string_literal)
59 
60 /// Converts a string literal to a `pw_tokenizer_Token` (`uint32_t`) token in a
61 /// standalone statement. C and C++ compatible. In C++, the string may be a
62 /// literal or a constexpr char array, including function variables like
63 /// `__func__`. In C, the argument must be a string literal. In either case, the
64 /// string must be null terminated, but may contain any characters (including
65 /// '\0').
66 ///
67 /// @code
68 ///
69 ///   constexpr uint32_t token = PW_TOKENIZE_STRING("Any string literal!");
70 ///
71 /// @endcode
72 #define PW_TOKENIZE_STRING(string_literal) \
73   PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
74 
75 /// Converts a string literal to a ``uint32_t`` token within an expression.
76 /// Requires C++.
77 ///
78 /// @code
79 ///
80 ///   DoSomething(PW_TOKENIZE_STRING_EXPR("Succeed"));
81 ///
82 /// @endcode
83 #define PW_TOKENIZE_STRING_EXPR(string_literal)                               \
84   [&] {                                                                       \
85     constexpr uint32_t lambda_ret_token = PW_TOKENIZE_STRING(string_literal); \
86     return lambda_ret_token;                                                  \
87   }()
88 
89 /// Tokenizes a string literal in a standalone statement using the specified
90 /// @rstref{domain<module-pw_tokenizer-domains>}. C and C++ compatible.
91 #define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
92   PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)
93 
94 /// Tokenizes a string literal using the specified @rstref{domain
95 /// <module-pw_tokenizer-domains>} within an expression. Requires C++.
96 #define PW_TOKENIZE_STRING_DOMAIN_EXPR(domain, string_literal) \
97   [&] {                                                        \
98     constexpr uint32_t lambda_ret_token =                      \
99         PW_TOKENIZE_STRING_DOMAIN(domain, string_literal);     \
100     return lambda_ret_token;                                   \
101   }()
102 
103 /// Tokenizes a string literal in a standalone statement using the specified
104 /// @rstref{domain <module-pw_tokenizer-domains>} and @rstref{bit mask
105 /// <module-pw_tokenizer-masks>}. C and C++ compatible.
106 #define PW_TOKENIZE_STRING_MASK(domain, mask, string_literal)                \
107   /* assign to a variable */ _PW_TOKENIZER_MASK_TOKEN(mask, string_literal); \
108                                                                              \
109   static_assert(0 < (mask) && (mask) <= UINT32_MAX,                          \
110                 "Tokenizer masks must be non-zero uint32_t values.");        \
111                                                                              \
112   PW_TOKENIZER_DEFINE_TOKEN(                                                 \
113       _PW_TOKENIZER_MASK_TOKEN(mask, string_literal), domain, string_literal)
114 
115 /// Tokenizes a string literal using the specified @rstref{domain
116 /// <module-pw_tokenizer-domains>} and @rstref{bit mask
117 /// <module-pw_tokenizer-masks>} within an expression. Requires C++.
118 #define PW_TOKENIZE_STRING_MASK_EXPR(domain, mask, string_literal) \
119   [&] {                                                            \
120     constexpr uint32_t lambda_ret_token =                          \
121         PW_TOKENIZE_STRING_MASK(domain, mask, string_literal);     \
122     return lambda_ret_token;                                       \
123   }()
124 
125 #define _PW_TOKENIZER_MASK_TOKEN(mask, string_literal) \
126   ((pw_tokenizer_Token)(mask) & PW_TOKENIZER_STRING_TOKEN(string_literal))
127 
128 /// Encodes a tokenized string and arguments to the provided buffer. The size of
129 /// the buffer is passed via a pointer to a `size_t`. After encoding is
130 /// complete, the `size_t` is set to the number of bytes written to the buffer.
131 ///
132 /// The macro's arguments are equivalent to the following function signature:
133 ///
134 /// @code
135 ///
136 ///   TokenizeToBuffer(void* buffer,
137 ///                    size_t* buffer_size_pointer,
138 ///                    const char* format,
139 ///                    ...);  // printf-style arguments
140 /// @endcode
141 ///
142 /// For example, the following encodes a tokenized string with a temperature to
143 /// a buffer. The buffer is passed to a function to send the message over a
144 /// UART.
145 ///
146 /// @code
147 ///
148 ///   uint8_t buffer[32];
149 ///   size_t size_bytes = sizeof(buffer);
150 ///   PW_TOKENIZE_TO_BUFFER(
151 ///       buffer, &size_bytes, "Temperature (C): %0.2f", temperature_c);
152 ///   MyProject_EnqueueMessageForUart(buffer, size);
153 ///
154 /// @endcode
155 ///
156 /// While `PW_TOKENIZE_TO_BUFFER` is very flexible, it must be passed a buffer,
157 /// which increases its code size footprint at the call site.
158 #define PW_TOKENIZE_TO_BUFFER(buffer, buffer_size_pointer, format, ...) \
159   PW_TOKENIZE_TO_BUFFER_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN,             \
160                                buffer,                                  \
161                                buffer_size_pointer,                     \
162                                format,                                  \
163                                __VA_ARGS__)
164 
165 /// Same as @c_macro{PW_TOKENIZE_TO_BUFFER}, but tokenizes to the specified
166 /// @rstref{domain <module-pw_tokenizer-domains>}.
167 #define PW_TOKENIZE_TO_BUFFER_DOMAIN(                 \
168     domain, buffer, buffer_size_pointer, format, ...) \
169   PW_TOKENIZE_TO_BUFFER_MASK(                         \
170       domain, UINT32_MAX, buffer, buffer_size_pointer, format, __VA_ARGS__)
171 
172 /// Same as @c_macro{PW_TOKENIZE_TO_BUFFER_DOMAIN}, but applies a
173 /// @rstref{bit mask <module-pw_tokenizer-masks>} to the token.
174 #define PW_TOKENIZE_TO_BUFFER_MASK(                                          \
175     domain, mask, buffer, buffer_size_pointer, format, ...)                  \
176   do {                                                                       \
177     PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__);            \
178     _pw_tokenizer_ToBuffer(buffer,                                           \
179                            buffer_size_pointer,                              \
180                            PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
181   } while (0)
182 
183 /// @brief Low-level macro for calling functions that handle tokenized strings.
184 ///
185 /// Functions that work with tokenized format strings must take the following
186 /// arguments:
187 ///
188 /// - The 32-bit token (@cpp_type{pw_tokenizer_Token})
189 /// - The 32- or 64-bit argument types (@cpp_type{pw_tokenizer_ArgTypes})
190 /// - Variadic arguments, if any
191 ///
192 /// This macro expands to those arguments. Custom tokenization macros should use
193 /// this macro to pass these arguments to a function or other macro.
194 ///
195 /** @code{cpp}
196  *    EncodeMyTokenizedString(uint32_t token,
197  *                            pw_tokenier_ArgTypes arg_types,
198  *                            ...);
199  *
200  *    #define CUSTOM_TOKENIZATION_MACRO(format, ...)                  \
201  *      PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
202  *      EncodeMyTokenizedString(PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__))
203  *  @endcode
204  */
205 #define PW_TOKENIZER_REPLACE_FORMAT_STRING(...) \
206   _PW_TOKENIZER_REPLACE_FORMAT_STRING(PW_EMPTY_ARGS(__VA_ARGS__), __VA_ARGS__)
207 
208 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING(empty_args, ...) \
209   _PW_CONCAT_2(_PW_TOKENIZER_REPLACE_FORMAT_STRING_, empty_args)(__VA_ARGS__)
210 
211 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING_1() _pw_tokenizer_token, 0u
212 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING_0(...) \
213   _pw_tokenizer_token, PW_TOKENIZER_ARG_TYPES(__VA_ARGS__), __VA_ARGS__
214 
215 /// Converts a series of arguments to a compact format that replaces the format
216 /// string literal. Evaluates to a `pw_tokenizer_ArgTypes` value.
217 ///
218 /// Depending on the size of `pw_tokenizer_ArgTypes`, the bottom 4 or 6 bits
219 /// store the number of arguments and the remaining bits store the types, two
220 /// bits per type. The arguments are not evaluated; only their types are used.
221 ///
222 /// In general, @c_macro{PW_TOKENIZER_ARG_TYPES} should not be used directly.
223 /// Instead, use @c_macro{PW_TOKENIZER_REPLACE_FORMAT_STRING}.
224 #define PW_TOKENIZER_ARG_TYPES(...) \
225   PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZER_TYPES_, __VA_ARGS__)
226 
227 PW_EXTERN_C_START
228 
229 // These functions encode the tokenized strings. These should not be called
230 // directly. Instead, use the corresponding PW_TOKENIZE_TO_* macros above.
231 void _pw_tokenizer_ToBuffer(void* buffer,
232                             size_t* buffer_size_bytes,  // input and output arg
233                             pw_tokenizer_Token token,
234                             pw_tokenizer_ArgTypes types,
235                             ...);
236 
237 // This empty function allows the compiler to check the format string.
238 static inline void pw_tokenizer_CheckFormatString(const char* format, ...)
239     PW_PRINTF_FORMAT(1, 2);
240 
pw_tokenizer_CheckFormatString(const char * format,...)241 static inline void pw_tokenizer_CheckFormatString(const char* format, ...) {
242   (void)format;
243 }
244 
245 PW_EXTERN_C_END
246 
247 /// Tokenizes a format string with optional arguments and sets the
248 /// `_pw_tokenizer_token` variable to the token. Must be used in its own scope,
249 /// since the same variable is used in every invocation.
250 ///
251 /// The tokenized string uses the specified @rstref{tokenization domain
252 /// <module-pw_tokenizer-domains>}. Use `PW_TOKENIZER_DEFAULT_DOMAIN` for the
253 /// default. The token also may be masked; use `UINT32_MAX` to keep all bits.
254 ///
255 /// This macro checks that the printf-style format string matches the arguments
256 /// and that no more than @c_macro{PW_TOKENIZER_MAX_SUPPORTED_ARGS} are
257 /// provided. It then stores the format string in a special section, and
258 /// calculates the string's token at compile time.
259 // clang-format off
260 #define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...)                   \
261   static_assert(                                                               \
262       PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS,   \
263       "Tokenized strings cannot have more than "                               \
264       PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; "             \
265       PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__))                         \
266       " arguments were used for " #format " (" #__VA_ARGS__ ")");              \
267   PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, __VA_ARGS__)
268 // clang-format on
269 
270 /// Equivalent to `PW_TOKENIZE_FORMAT_STRING`, but supports any number of
271 /// arguments.
272 ///
273 /// This is a low-level macro that should rarely be used directly. It is
274 /// intended for situations when @cpp_type{pw_tokenizer_ArgTypes} is not used.
275 /// There are two situations where @cpp_type{pw_tokenizer_ArgTypes} is
276 /// unnecessary:
277 ///
278 /// - The exact format string argument types and count are fixed.
279 /// - The format string supports a variable number of arguments of only one
280 ///   type. In this case, @c_macro{PW_FUNCTION_ARG_COUNT} may be used to pass
281 ///   the argument count to the function.
282 #define PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, ...)     \
283   if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
284     pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__));         \
285   }                                                                            \
286                                                                                \
287   /* Tokenize the string to a pw_tokenizer_Token at compile time. */           \
288   static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token =          \
289       _PW_TOKENIZER_MASK_TOKEN(mask, format);                                  \
290                                                                                \
291   PW_TOKENIZER_DEFINE_TOKEN(_pw_tokenizer_token, domain, format)
292 
293 // Creates unique names to use for tokenized string entries and linker sections.
294 #define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)
295 
296 #ifdef __cplusplus
297 
298 #define _PW_TOKENIZER_CONST constexpr
299 
300 /// Records the original token, domain and string directly.
301 ///
302 /// This macro is intended to be used for tokenized enum and domain support. The
303 /// values are stored as an entry in the ELF section. As a note for tokenized
304 /// enum support, the enum name should be used as the string, and the enum value
305 /// as the token.
306 #define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string)                       \
307   static_assert(::pw::tokenizer::internal::ValidDomain(domain),                \
308                 "pw_tokenizer domains may only contain alphanumeric "          \
309                 "characters, underscore, or colon, and cannot start with a "   \
310                 "number; space characters are ignored");                       \
311   alignas(1) static constexpr auto _PW_TOKENIZER_SECTION _PW_TOKENIZER_UNIQUE( \
312       _pw_tokenizer_string_entry_) =                                           \
313       ::pw::tokenizer::internal::MakeEntry(token, domain, string)
314 
315 namespace pw::tokenizer {
316 
317 using Token = ::pw_tokenizer_Token;
318 inline constexpr const char* kDefaultDomain = PW_TOKENIZER_DEFAULT_DOMAIN;
319 
320 }  // namespace pw::tokenizer
321 
322 #else
323 
324 #define _PW_TOKENIZER_CONST const
325 #define _PW_ALIGNAS(alignment) __attribute__((aligned(alignment)))
326 
327 #define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
328   _PW_ALIGNAS(1) static const _PW_TOKENIZER_STRING_ENTRY(token, domain, string)
329 
330 #endif  // __cplusplus
331 
332 // _PW_TOKENIZER_SECTION places the tokenized strings in a special .pw_tokenizer
333 // linker section. Host-side decoding tools read the strings and tokens from
334 // this section to build a database of tokenized strings.
335 //
336 // This section should be declared as type INFO so that it is excluded from the
337 // final binary. To declare the section, as well as the .pw_tokenizer.info
338 // metadata section, add the following to the linker script's SECTIONS command:
339 //
340 //   .pw_tokenizer.info 0x0 (INFO) :
341 //   {
342 //     KEEP(*(.pw_tokenizer.info))
343 //   }
344 //
345 //   .pw_tokenizer.entries 0x0 (INFO) :
346 //   {
347 //     KEEP(*(.pw_tokenizer.entries.*))
348 //   }
349 //
350 // A linker script snippet that provides these sections is provided in the file
351 // pw_tokenizer_linker_sections.ld. This file may be directly included into
352 // existing linker scripts.
353 //
354 // The tokenized string sections can also be managed without linker script
355 // modifications, though this is not recommended. The section can be extracted
356 // and removed from the ELF with objcopy:
357 //
358 //   objcopy --only-section .pw_tokenizer.* <ORIGINAL_ELF> <OUTPUT_ELF>
359 //   objcopy --remove-section .pw_tokenizer.* <ORIGINAL_ELF>
360 //
361 // OUTPUT_ELF will be an ELF with only the tokenized strings, and the original
362 // ELF file will have the sections removed.
363 //
364 // Without the above linker script modifications, the section garbage collection
365 // option (--gc-sections) removes the tokenized string sections. To avoid
366 // editing the target linker script, a separate metadata ELF can be linked
367 // without --gc-sections to preserve the tokenized data.
368 //
369 // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
370 // executables) do not support section names longer than 16 characters, so a
371 // short, unused section name is used on macOS.
372 #ifdef __APPLE__
373 #define _PW_TOKENIZER_SECTION \
374   PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw.)))
375 #else
376 #define _PW_TOKENIZER_SECTION \
377   PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw_tokenizer.entries.)))
378 #endif  // __APPLE__
379