• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/compiler/objectivec/tf_decode_data.h"
9 
10 #include <cstdint>
11 #include <sstream>
12 #include <string>
13 #include <vector>
14 
15 #include "absl/log/absl_check.h"
16 #include "absl/strings/ascii.h"
17 #include "absl/strings/escaping.h"
18 #include "absl/strings/match.h"
19 #include "google/protobuf/io/coded_stream.h"
20 #include "google/protobuf/io/zero_copy_stream_impl.h"
21 
22 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
23 // error cases, so it seems to be ok to use as a back door for errors.
24 
25 namespace google {
26 namespace protobuf {
27 namespace compiler {
28 namespace objectivec {
29 
30 namespace {
31 
32 // Helper to build up the decode data for a string.
33 class DecodeDataBuilder {
34  public:
DecodeDataBuilder()35   DecodeDataBuilder() { Reset(); }
36 
37   bool AddCharacter(char desired, char input);
AddUnderscore()38   void AddUnderscore() {
39     Push();
40     need_underscore_ = true;
41   }
Finish()42   std::string Finish() {
43     Push();
44     return decode_data_;
45   }
46 
47  private:
48   static constexpr uint8_t kAddUnderscore = 0x80;
49 
50   static constexpr uint8_t kOpAsIs = 0x00;
51   static constexpr uint8_t kOpFirstUpper = 0x40;
52   static constexpr uint8_t kOpFirstLower = 0x20;
53   static constexpr uint8_t kOpAllUpper = 0x60;
54 
55   static constexpr int kMaxSegmentLen = 0x1f;
56 
AddChar(const char desired)57   void AddChar(const char desired) {
58     ++segment_len_;
59     is_all_upper_ &= absl::ascii_isupper(desired);
60   }
61 
Push()62   void Push() {
63     uint8_t op = (op_ | segment_len_);
64     if (need_underscore_) op |= kAddUnderscore;
65     if (op != 0) {
66       decode_data_ += (char)op;
67     }
68     Reset();
69   }
70 
AddFirst(const char desired,const char input)71   bool AddFirst(const char desired, const char input) {
72     if (desired == input) {
73       op_ = kOpAsIs;
74     } else if (desired == absl::ascii_toupper(input)) {
75       op_ = kOpFirstUpper;
76     } else if (desired == absl::ascii_tolower(input)) {
77       op_ = kOpFirstLower;
78     } else {
79       // Can't be transformed to match.
80       return false;
81     }
82     AddChar(desired);
83     return true;
84   }
85 
Reset()86   void Reset() {
87     need_underscore_ = false;
88     op_ = 0;
89     segment_len_ = 0;
90     is_all_upper_ = true;
91   }
92 
93   bool need_underscore_;
94   bool is_all_upper_;
95   uint8_t op_;
96   int segment_len_;
97 
98   std::string decode_data_;
99 };
100 
AddCharacter(char desired,char input)101 bool DecodeDataBuilder::AddCharacter(char desired, char input) {
102   // If we've hit the max size, push to start a new segment.
103   if (segment_len_ == kMaxSegmentLen) {
104     Push();
105   }
106   if (segment_len_ == 0) {
107     return AddFirst(desired, input);
108   }
109 
110   // Desired and input match...
111   if (desired == input) {
112     // If we aren't transforming it, or we're upper casing it and it is
113     // supposed to be uppercase; just add it to the segment.
114     if ((op_ != kOpAllUpper) || absl::ascii_isupper(desired)) {
115       AddChar(desired);
116       return true;
117     }
118 
119     // Add the current segment, and start the next one.
120     Push();
121     return AddFirst(desired, input);
122   }
123 
124   // If we need to uppercase, and everything so far has been uppercase,
125   // promote op to AllUpper.
126   if ((desired == absl::ascii_toupper(input)) && is_all_upper_) {
127     op_ = kOpAllUpper;
128     AddChar(desired);
129     return true;
130   }
131 
132   // Give up, push and start a new segment.
133   Push();
134   return AddFirst(desired, input);
135 }
136 
137 // If decode data can't be generated, a directive for the raw string
138 // is used instead.
DirectDecodeString(const std::string & str)139 std::string DirectDecodeString(const std::string& str) {
140   std::string result;
141   result += (char)'\0';  // Marker for full string.
142   result += str;
143   result += (char)'\0';  // End of string.
144   return result;
145 }
146 
147 }  // namespace
148 
AddString(int32_t key,const std::string & input_for_decode,const std::string & desired_output)149 void TextFormatDecodeData::AddString(int32_t key,
150                                      const std::string& input_for_decode,
151                                      const std::string& desired_output) {
152   for (std::vector<DataEntry>::const_iterator i = entries_.begin();
153        i != entries_.end(); ++i) {
154     ABSL_CHECK(i->first != key)
155         << "error: duplicate key (" << key
156         << ") making TextFormat data, input: \"" << input_for_decode
157         << "\", desired: \"" << desired_output << "\".";
158   }
159 
160   const std::string& data = TextFormatDecodeData::DecodeDataForString(
161       input_for_decode, desired_output);
162   entries_.push_back(DataEntry(key, data));
163 }
164 
Data() const165 std::string TextFormatDecodeData::Data() const {
166   std::ostringstream data_stringstream;
167 
168   if (num_entries() > 0) {
169     io::OstreamOutputStream data_outputstream(&data_stringstream);
170     io::CodedOutputStream output_stream(&data_outputstream);
171 
172     output_stream.WriteVarint32(num_entries());
173     for (std::vector<DataEntry>::const_iterator i = entries_.begin();
174          i != entries_.end(); ++i) {
175       output_stream.WriteVarint32(i->first);
176       output_stream.WriteString(i->second);
177     }
178   }
179 
180   data_stringstream.flush();
181   return data_stringstream.str();
182 }
183 
184 // static
DecodeDataForString(const std::string & input_for_decode,const std::string & desired_output)185 std::string TextFormatDecodeData::DecodeDataForString(
186     const std::string& input_for_decode, const std::string& desired_output) {
187   ABSL_CHECK(!input_for_decode.empty() && !desired_output.empty())
188       << "error: got empty string for making TextFormat data, input: \""
189       << input_for_decode << "\", desired: \"" << desired_output << "\".";
190   ABSL_CHECK(!absl::StrContains(input_for_decode, '\0') &&
191              !absl::StrContains(desired_output, '\0'))
192       << "error: got a null char in a string for making TextFormat data,"
193       << " input: \"" << absl::CEscape(input_for_decode) << "\", desired: \""
194       << absl::CEscape(desired_output) << "\".";
195 
196   DecodeDataBuilder builder;
197 
198   // Walk the output building it from the input.
199   int x = 0;
200   for (int y = 0; y < desired_output.size(); y++) {
201     const char d = desired_output[y];
202     if (d == '_') {
203       builder.AddUnderscore();
204       continue;
205     }
206 
207     if (x >= input_for_decode.size()) {
208       // Out of input, no way to encode it, just return a full decode.
209       return DirectDecodeString(desired_output);
210     }
211     if (builder.AddCharacter(d, input_for_decode[x])) {
212       ++x;  // Consumed one input
213     } else {
214       // Couldn't transform for the next character, just return a full decode.
215       return DirectDecodeString(desired_output);
216     }
217   }
218 
219   if (x != input_for_decode.size()) {
220     // Extra input (suffix from name sanitizing?), just return a full decode.
221     return DirectDecodeString(desired_output);
222   }
223 
224   // Add the end marker.
225   return builder.Finish() + (char)'\0';
226 }
227 
228 }  // namespace objectivec
229 }  // namespace compiler
230 }  // namespace protobuf
231 }  // namespace google
232