1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/compiler/objectivec/tf_decode_data.h"
9
10 #include <cstdint>
11 #include <sstream>
12 #include <string>
13 #include <vector>
14
15 #include "absl/log/absl_check.h"
16 #include "absl/strings/ascii.h"
17 #include "absl/strings/escaping.h"
18 #include "absl/strings/match.h"
19 #include "google/protobuf/io/coded_stream.h"
20 #include "google/protobuf/io/zero_copy_stream_impl.h"
21
22 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
23 // error cases, so it seems to be ok to use as a back door for errors.
24
25 namespace google {
26 namespace protobuf {
27 namespace compiler {
28 namespace objectivec {
29
30 namespace {
31
32 // Helper to build up the decode data for a string.
33 class DecodeDataBuilder {
34 public:
DecodeDataBuilder()35 DecodeDataBuilder() { Reset(); }
36
37 bool AddCharacter(char desired, char input);
AddUnderscore()38 void AddUnderscore() {
39 Push();
40 need_underscore_ = true;
41 }
Finish()42 std::string Finish() {
43 Push();
44 return decode_data_;
45 }
46
47 private:
48 static constexpr uint8_t kAddUnderscore = 0x80;
49
50 static constexpr uint8_t kOpAsIs = 0x00;
51 static constexpr uint8_t kOpFirstUpper = 0x40;
52 static constexpr uint8_t kOpFirstLower = 0x20;
53 static constexpr uint8_t kOpAllUpper = 0x60;
54
55 static constexpr int kMaxSegmentLen = 0x1f;
56
AddChar(const char desired)57 void AddChar(const char desired) {
58 ++segment_len_;
59 is_all_upper_ &= absl::ascii_isupper(desired);
60 }
61
Push()62 void Push() {
63 uint8_t op = (op_ | segment_len_);
64 if (need_underscore_) op |= kAddUnderscore;
65 if (op != 0) {
66 decode_data_ += (char)op;
67 }
68 Reset();
69 }
70
AddFirst(const char desired,const char input)71 bool AddFirst(const char desired, const char input) {
72 if (desired == input) {
73 op_ = kOpAsIs;
74 } else if (desired == absl::ascii_toupper(input)) {
75 op_ = kOpFirstUpper;
76 } else if (desired == absl::ascii_tolower(input)) {
77 op_ = kOpFirstLower;
78 } else {
79 // Can't be transformed to match.
80 return false;
81 }
82 AddChar(desired);
83 return true;
84 }
85
Reset()86 void Reset() {
87 need_underscore_ = false;
88 op_ = 0;
89 segment_len_ = 0;
90 is_all_upper_ = true;
91 }
92
93 bool need_underscore_;
94 bool is_all_upper_;
95 uint8_t op_;
96 int segment_len_;
97
98 std::string decode_data_;
99 };
100
AddCharacter(char desired,char input)101 bool DecodeDataBuilder::AddCharacter(char desired, char input) {
102 // If we've hit the max size, push to start a new segment.
103 if (segment_len_ == kMaxSegmentLen) {
104 Push();
105 }
106 if (segment_len_ == 0) {
107 return AddFirst(desired, input);
108 }
109
110 // Desired and input match...
111 if (desired == input) {
112 // If we aren't transforming it, or we're upper casing it and it is
113 // supposed to be uppercase; just add it to the segment.
114 if ((op_ != kOpAllUpper) || absl::ascii_isupper(desired)) {
115 AddChar(desired);
116 return true;
117 }
118
119 // Add the current segment, and start the next one.
120 Push();
121 return AddFirst(desired, input);
122 }
123
124 // If we need to uppercase, and everything so far has been uppercase,
125 // promote op to AllUpper.
126 if ((desired == absl::ascii_toupper(input)) && is_all_upper_) {
127 op_ = kOpAllUpper;
128 AddChar(desired);
129 return true;
130 }
131
132 // Give up, push and start a new segment.
133 Push();
134 return AddFirst(desired, input);
135 }
136
137 // If decode data can't be generated, a directive for the raw string
138 // is used instead.
DirectDecodeString(const std::string & str)139 std::string DirectDecodeString(const std::string& str) {
140 std::string result;
141 result += (char)'\0'; // Marker for full string.
142 result += str;
143 result += (char)'\0'; // End of string.
144 return result;
145 }
146
147 } // namespace
148
AddString(int32_t key,const std::string & input_for_decode,const std::string & desired_output)149 void TextFormatDecodeData::AddString(int32_t key,
150 const std::string& input_for_decode,
151 const std::string& desired_output) {
152 for (std::vector<DataEntry>::const_iterator i = entries_.begin();
153 i != entries_.end(); ++i) {
154 ABSL_CHECK(i->first != key)
155 << "error: duplicate key (" << key
156 << ") making TextFormat data, input: \"" << input_for_decode
157 << "\", desired: \"" << desired_output << "\".";
158 }
159
160 const std::string& data = TextFormatDecodeData::DecodeDataForString(
161 input_for_decode, desired_output);
162 entries_.push_back(DataEntry(key, data));
163 }
164
Data() const165 std::string TextFormatDecodeData::Data() const {
166 std::ostringstream data_stringstream;
167
168 if (num_entries() > 0) {
169 io::OstreamOutputStream data_outputstream(&data_stringstream);
170 io::CodedOutputStream output_stream(&data_outputstream);
171
172 output_stream.WriteVarint32(num_entries());
173 for (std::vector<DataEntry>::const_iterator i = entries_.begin();
174 i != entries_.end(); ++i) {
175 output_stream.WriteVarint32(i->first);
176 output_stream.WriteString(i->second);
177 }
178 }
179
180 data_stringstream.flush();
181 return data_stringstream.str();
182 }
183
184 // static
DecodeDataForString(const std::string & input_for_decode,const std::string & desired_output)185 std::string TextFormatDecodeData::DecodeDataForString(
186 const std::string& input_for_decode, const std::string& desired_output) {
187 ABSL_CHECK(!input_for_decode.empty() && !desired_output.empty())
188 << "error: got empty string for making TextFormat data, input: \""
189 << input_for_decode << "\", desired: \"" << desired_output << "\".";
190 ABSL_CHECK(!absl::StrContains(input_for_decode, '\0') &&
191 !absl::StrContains(desired_output, '\0'))
192 << "error: got a null char in a string for making TextFormat data,"
193 << " input: \"" << absl::CEscape(input_for_decode) << "\", desired: \""
194 << absl::CEscape(desired_output) << "\".";
195
196 DecodeDataBuilder builder;
197
198 // Walk the output building it from the input.
199 int x = 0;
200 for (int y = 0; y < desired_output.size(); y++) {
201 const char d = desired_output[y];
202 if (d == '_') {
203 builder.AddUnderscore();
204 continue;
205 }
206
207 if (x >= input_for_decode.size()) {
208 // Out of input, no way to encode it, just return a full decode.
209 return DirectDecodeString(desired_output);
210 }
211 if (builder.AddCharacter(d, input_for_decode[x])) {
212 ++x; // Consumed one input
213 } else {
214 // Couldn't transform for the next character, just return a full decode.
215 return DirectDecodeString(desired_output);
216 }
217 }
218
219 if (x != input_for_decode.size()) {
220 // Extra input (suffix from name sanitizing?), just return a full decode.
221 return DirectDecodeString(desired_output);
222 }
223
224 // Add the end marker.
225 return builder.Finish() + (char)'\0';
226 }
227
228 } // namespace objectivec
229 } // namespace compiler
230 } // namespace protobuf
231 } // namespace google
232