• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/detokenize.h"
16 
17 #include <algorithm>
18 #include <cstring>
19 
20 #include "pw_bytes/bit.h"
21 #include "pw_bytes/endian.h"
22 #include "pw_tokenizer/internal/decode.h"
23 
24 namespace pw::tokenizer {
25 namespace {
26 
UnknownTokenMessage(uint32_t value)27 std::string UnknownTokenMessage(uint32_t value) {
28   std::string output(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "unknown token ");
29 
30   // Output a hexadecimal version of the token.
31   for (int shift = 28; shift >= 0; shift -= 4) {
32     output.push_back("0123456789abcdef"[(value >> shift) & 0xF]);
33   }
34 
35   output.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
36   return output;
37 }
38 
39 // Decoding result with the date removed, for sorting.
40 using DecodingResult = std::pair<DecodedFormatString, uint32_t>;
41 
42 // Determines if one result is better than the other if collisions occurred.
43 // Returns true if lhs is preferred over rhs. This logic should match the
44 // collision resolution logic in detokenize.py.
IsBetterResult(const DecodingResult & lhs,const DecodingResult & rhs)45 bool IsBetterResult(const DecodingResult& lhs, const DecodingResult& rhs) {
46   // Favor the result for which decoding succeeded.
47   if (lhs.first.ok() != rhs.first.ok()) {
48     return lhs.first.ok();
49   }
50 
51   // Favor the result for which all bytes were decoded.
52   if ((lhs.first.remaining_bytes() == 0u) !=
53       (rhs.first.remaining_bytes() == 0u)) {
54     return lhs.first.remaining_bytes() == 0u;
55   }
56 
57   // Favor the result with fewer decoding errors.
58   if (lhs.first.decoding_errors() != rhs.first.decoding_errors()) {
59     return lhs.first.decoding_errors() < rhs.first.decoding_errors();
60   }
61 
62   // Favor the result that successfully decoded the most arguments.
63   if (lhs.first.argument_count() != rhs.first.argument_count()) {
64     return lhs.first.argument_count() > rhs.first.argument_count();
65   }
66 
67   // Favor the result that was removed from the database most recently.
68   return lhs.second > rhs.second;
69 }
70 
71 }  // namespace
72 
DetokenizedString(uint32_t token,const span<const TokenizedStringEntry> & entries,const span<const uint8_t> & arguments)73 DetokenizedString::DetokenizedString(
74     uint32_t token,
75     const span<const TokenizedStringEntry>& entries,
76     const span<const uint8_t>& arguments)
77     : token_(token), has_token_(true) {
78   std::vector<DecodingResult> results;
79 
80   for (const auto& [format, date_removed] : entries) {
81     results.push_back(DecodingResult{format.Format(arguments), date_removed});
82   }
83 
84   std::sort(results.begin(), results.end(), IsBetterResult);
85 
86   for (auto& result : results) {
87     matches_.push_back(std::move(result.first));
88   }
89 }
90 
BestString() const91 std::string DetokenizedString::BestString() const {
92   return matches_.empty() ? std::string() : matches_[0].value();
93 }
94 
BestStringWithErrors() const95 std::string DetokenizedString::BestStringWithErrors() const {
96   if (matches_.empty()) {
97     return has_token_ ? UnknownTokenMessage(token_)
98                       : PW_TOKENIZER_ARG_DECODING_ERROR("missing token");
99   }
100   return matches_[0].value_with_errors();
101 }
102 
Detokenizer(const TokenDatabase & database)103 Detokenizer::Detokenizer(const TokenDatabase& database) {
104   for (const auto& entry : database) {
105     database_[entry.token].emplace_back(entry.string, entry.date_removed);
106   }
107 }
108 
Detokenize(const span<const uint8_t> & encoded) const109 DetokenizedString Detokenizer::Detokenize(
110     const span<const uint8_t>& encoded) const {
111   // The token is missing from the encoded data; there is nothing to do.
112   if (encoded.empty()) {
113     return DetokenizedString();
114   }
115 
116   uint32_t token = bytes::ReadInOrder<uint32_t>(
117       endian::little, encoded.data(), encoded.size());
118 
119   const auto result = database_.find(token);
120 
121   return DetokenizedString(
122       token,
123       result == database_.end() ? span<TokenizedStringEntry>()
124                                 : span(result->second),
125       encoded.size() < sizeof(token) ? span<const uint8_t>()
126                                      : encoded.subspan(sizeof(token)));
127 }
128 
129 }  // namespace pw::tokenizer
130