1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Here we have a hand-written lexer. At first you might ask yourself,
36 // "Hand-written text processing? Is Kenton crazy?!" Well, first of all,
37 // yes I am crazy, but that's beside the point. There are actually reasons
38 // why I ended up writing this this way.
39 //
40 // The traditional approach to lexing is to use lex to generate a lexer for
41 // you. Unfortunately, lex's output is ridiculously ugly and difficult to
42 // integrate cleanly with C++ code, especially abstract code or code meant
43 // as a library. Better parser-generators exist but would add dependencies
44 // which most users won't already have, which we'd like to avoid. (GNU flex
45 // has a C++ output option, but it's still ridiculously ugly, non-abstract,
46 // and not library-friendly.)
47 //
48 // The next approach that any good software engineer should look at is to
49 // use regular expressions. And, indeed, I did. I have code which
50 // implements this same class using regular expressions. It's about 200
51 // lines shorter. However:
52 // - Rather than error messages telling you "This string has an invalid
53 // escape sequence at line 5, column 45", you get error messages like
54 // "Parse error on line 5". Giving more precise errors requires adding
55 // a lot of code that ends up basically as complex as the hand-coded
56 // version anyway.
57 // - The regular expression to match a string literal looks like this:
58 // kString = new RE("(\"([^\"\\\\]|" // non-escaped
59 // "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape
60 // "\\\\x[0-9a-fA-F])*\"|" // hex escape
61 // "\'([^\'\\\\]|" // Also support single-quotes.
62 // "\\\\[abfnrtv?\"'\\\\0-7]|"
63 // "\\\\x[0-9a-fA-F])*\')");
64 // Verifying the correctness of this line noise is actually harder than
65 // verifying the correctness of ConsumeString(), defined below. I'm not
66 // even confident that the above is correct, after staring at it for some
67 // time.
68 // - PCRE is fast, but there's still more overhead involved than the code
69 // below.
70 // - Sadly, regular expressions are not part of the C standard library, so
71 // using them would require depending on some other library. For the
72 // open source release, this could be really annoying. Nobody likes
73 // downloading one piece of software just to find that they need to
74 // download something else to make it work, and in all likelihood
75 // people downloading Protocol Buffers will already be doing so just
76 // to make something else work. We could include a copy of PCRE with
77 // our code, but that obligates us to keep it up-to-date and just seems
78 // like a big waste just to save 200 lines of code.
79 //
80 // On a similar but unrelated note, I'm even scared to use ctype.h.
81 // Apparently functions like isalpha() are locale-dependent. So, if we used
82 // that, then if this code is being called from some program that doesn't
83 // have its locale set to "C", it would behave strangely. We can't just set
84 // the locale to "C" ourselves since we might break the calling program that
85 // way, particularly if it is multi-threaded. WTF? Someone please let me
86 // (Kenton) know if I'm missing something here...
87 //
88 // I'd love to hear about other alternatives, though, as this code isn't
89 // exactly pretty.
90
91 #include <google/protobuf/io/tokenizer.h>
92 #include <google/protobuf/io/zero_copy_stream.h>
93 #include <google/protobuf/stubs/strutil.h>
94
95 namespace google {
96 namespace protobuf {
97 namespace io {
98 namespace {
99
100 // As mentioned above, I don't trust ctype.h due to the presence of "locales".
101 // So, I have written replacement functions here. Someone please smack me if
102 // this is a bad idea or if there is some way around this.
103 //
104 // These "character classes" are designed to be used in template methods.
105 // For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
106 // whitespace.
107
108 // Note: No class is allowed to contain '\0', since this is used to mark end-
109 // of-input and is handled specially.
110
111 #define CHARACTER_CLASS(NAME, EXPRESSION) \
112 class NAME { \
113 public: \
114 static inline bool InClass(char c) { \
115 return EXPRESSION; \
116 } \
117 }
118
119 CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
120 c == '\r' || c == '\v' || c == '\f');
121
122 CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
123
124 CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
125 CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
126 CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') ||
127 ('a' <= c && c <= 'f') ||
128 ('A' <= c && c <= 'F'));
129
130 CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') ||
131 ('A' <= c && c <= 'Z') ||
132 (c == '_'));
133
134 CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
135 ('A' <= c && c <= 'Z') ||
136 ('0' <= c && c <= '9') ||
137 (c == '_'));
138
139 CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
140 c == 'r' || c == 't' || c == 'v' || c == '\\' ||
141 c == '?' || c == '\'' || c == '\"');
142
143 #undef CHARACTER_CLASS
144
145 // Given a char, interpret it as a numeric digit and return its value.
146 // This supports any number base up to 36.
DigitValue(char digit)147 inline int DigitValue(char digit) {
148 if ('0' <= digit && digit <= '9') return digit - '0';
149 if ('a' <= digit && digit <= 'z') return digit - 'a' + 10;
150 if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10;
151 return -1;
152 }
153
154 // Inline because it's only used in one place.
TranslateEscape(char c)155 inline char TranslateEscape(char c) {
156 switch (c) {
157 case 'a': return '\a';
158 case 'b': return '\b';
159 case 'f': return '\f';
160 case 'n': return '\n';
161 case 'r': return '\r';
162 case 't': return '\t';
163 case 'v': return '\v';
164 case '\\': return '\\';
165 case '?': return '\?'; // Trigraphs = :(
166 case '\'': return '\'';
167 case '"': return '\"';
168
169 // We expect escape sequences to have been validated separately.
170 default: return '?';
171 }
172 }
173
174 } // anonymous namespace
175
~ErrorCollector()176 ErrorCollector::~ErrorCollector() {}
177
178 // ===================================================================
179
Tokenizer(ZeroCopyInputStream * input,ErrorCollector * error_collector)180 Tokenizer::Tokenizer(ZeroCopyInputStream* input,
181 ErrorCollector* error_collector)
182 : input_(input),
183 error_collector_(error_collector),
184 buffer_(NULL),
185 buffer_size_(0),
186 buffer_pos_(0),
187 read_error_(false),
188 line_(0),
189 column_(0),
190 token_start_(-1),
191 allow_f_after_float_(false),
192 comment_style_(CPP_COMMENT_STYLE) {
193
194 current_.line = 0;
195 current_.column = 0;
196 current_.type = TYPE_START;
197
198 Refresh();
199 }
200
~Tokenizer()201 Tokenizer::~Tokenizer() {
202 // If we had any buffer left unread, return it to the underlying stream
203 // so that someone else can read it.
204 if (buffer_size_ > buffer_pos_) {
205 input_->BackUp(buffer_size_ - buffer_pos_);
206 }
207 }
208
209 // -------------------------------------------------------------------
210 // Internal helpers.
211
NextChar()212 void Tokenizer::NextChar() {
213 // Update our line and column counters based on the character being
214 // consumed.
215 if (current_char_ == '\n') {
216 ++line_;
217 column_ = 0;
218 } else if (current_char_ == '\t') {
219 column_ += kTabWidth - column_ % kTabWidth;
220 } else {
221 ++column_;
222 }
223
224 // Advance to the next character.
225 ++buffer_pos_;
226 if (buffer_pos_ < buffer_size_) {
227 current_char_ = buffer_[buffer_pos_];
228 } else {
229 Refresh();
230 }
231 }
232
Refresh()233 void Tokenizer::Refresh() {
234 if (read_error_) {
235 current_char_ = '\0';
236 return;
237 }
238
239 // If we're in a token, append the rest of the buffer to it.
240 if (token_start_ >= 0 && token_start_ < buffer_size_) {
241 current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_);
242 token_start_ = 0;
243 }
244
245 const void* data = NULL;
246 buffer_ = NULL;
247 buffer_pos_ = 0;
248 do {
249 if (!input_->Next(&data, &buffer_size_)) {
250 // end of stream (or read error)
251 buffer_size_ = 0;
252 read_error_ = true;
253 current_char_ = '\0';
254 return;
255 }
256 } while (buffer_size_ == 0);
257
258 buffer_ = static_cast<const char*>(data);
259
260 current_char_ = buffer_[0];
261 }
262
StartToken()263 inline void Tokenizer::StartToken() {
264 token_start_ = buffer_pos_;
265 current_.type = TYPE_START; // Just for the sake of initializing it.
266 current_.text.clear();
267 current_.line = line_;
268 current_.column = column_;
269 }
270
EndToken()271 inline void Tokenizer::EndToken() {
272 // Note: The if() is necessary because some STL implementations crash when
273 // you call string::append(NULL, 0), presumably because they are trying to
274 // be helpful by detecting the NULL pointer, even though there's nothing
275 // wrong with reading zero bytes from NULL.
276 if (buffer_pos_ != token_start_) {
277 current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_);
278 }
279 token_start_ = -1;
280 }
281
282 // -------------------------------------------------------------------
283 // Helper methods that consume characters.
284
285 template<typename CharacterClass>
LookingAt()286 inline bool Tokenizer::LookingAt() {
287 return CharacterClass::InClass(current_char_);
288 }
289
290 template<typename CharacterClass>
TryConsumeOne()291 inline bool Tokenizer::TryConsumeOne() {
292 if (CharacterClass::InClass(current_char_)) {
293 NextChar();
294 return true;
295 } else {
296 return false;
297 }
298 }
299
TryConsume(char c)300 inline bool Tokenizer::TryConsume(char c) {
301 if (current_char_ == c) {
302 NextChar();
303 return true;
304 } else {
305 return false;
306 }
307 }
308
309 template<typename CharacterClass>
ConsumeZeroOrMore()310 inline void Tokenizer::ConsumeZeroOrMore() {
311 while (CharacterClass::InClass(current_char_)) {
312 NextChar();
313 }
314 }
315
316 template<typename CharacterClass>
ConsumeOneOrMore(const char * error)317 inline void Tokenizer::ConsumeOneOrMore(const char* error) {
318 if (!CharacterClass::InClass(current_char_)) {
319 AddError(error);
320 } else {
321 do {
322 NextChar();
323 } while (CharacterClass::InClass(current_char_));
324 }
325 }
326
327 // -------------------------------------------------------------------
328 // Methods that read whole patterns matching certain kinds of tokens
329 // or comments.
330
ConsumeString(char delimiter)331 void Tokenizer::ConsumeString(char delimiter) {
332 while (true) {
333 switch (current_char_) {
334 case '\0':
335 case '\n': {
336 AddError("String literals cannot cross line boundaries.");
337 return;
338 }
339
340 case '\\': {
341 // An escape sequence.
342 NextChar();
343 if (TryConsumeOne<Escape>()) {
344 // Valid escape sequence.
345 } else if (TryConsumeOne<OctalDigit>()) {
346 // Possibly followed by two more octal digits, but these will
347 // just be consumed by the main loop anyway so we don't need
348 // to do so explicitly here.
349 } else if (TryConsume('x') || TryConsume('X')) {
350 if (!TryConsumeOne<HexDigit>()) {
351 AddError("Expected hex digits for escape sequence.");
352 }
353 // Possibly followed by another hex digit, but again we don't care.
354 } else {
355 AddError("Invalid escape sequence in string literal.");
356 }
357 break;
358 }
359
360 default: {
361 if (current_char_ == delimiter) {
362 NextChar();
363 return;
364 }
365 NextChar();
366 break;
367 }
368 }
369 }
370 }
371
ConsumeNumber(bool started_with_zero,bool started_with_dot)372 Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
373 bool started_with_dot) {
374 bool is_float = false;
375
376 if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
377 // A hex number (started with "0x").
378 ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");
379
380 } else if (started_with_zero && LookingAt<Digit>()) {
381 // An octal number (had a leading zero).
382 ConsumeZeroOrMore<OctalDigit>();
383 if (LookingAt<Digit>()) {
384 AddError("Numbers starting with leading zero must be in octal.");
385 ConsumeZeroOrMore<Digit>();
386 }
387
388 } else {
389 // A decimal number.
390 if (started_with_dot) {
391 is_float = true;
392 ConsumeZeroOrMore<Digit>();
393 } else {
394 ConsumeZeroOrMore<Digit>();
395
396 if (TryConsume('.')) {
397 is_float = true;
398 ConsumeZeroOrMore<Digit>();
399 }
400 }
401
402 if (TryConsume('e') || TryConsume('E')) {
403 is_float = true;
404 TryConsume('-') || TryConsume('+');
405 ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
406 }
407
408 if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
409 is_float = true;
410 }
411 }
412
413 if (LookingAt<Letter>()) {
414 AddError("Need space between number and identifier.");
415 } else if (current_char_ == '.') {
416 if (is_float) {
417 AddError(
418 "Already saw decimal point or exponent; can't have another one.");
419 } else {
420 AddError("Hex and octal numbers must be integers.");
421 }
422 }
423
424 return is_float ? TYPE_FLOAT : TYPE_INTEGER;
425 }
426
ConsumeLineComment()427 void Tokenizer::ConsumeLineComment() {
428 while (current_char_ != '\0' && current_char_ != '\n') {
429 NextChar();
430 }
431 TryConsume('\n');
432 }
433
ConsumeBlockComment()434 void Tokenizer::ConsumeBlockComment() {
435 int start_line = line_;
436 int start_column = column_ - 2;
437
438 while (true) {
439 while (current_char_ != '\0' &&
440 current_char_ != '*' &&
441 current_char_ != '/') {
442 NextChar();
443 }
444
445 if (TryConsume('*') && TryConsume('/')) {
446 // End of comment.
447 break;
448 } else if (TryConsume('/') && current_char_ == '*') {
449 // Note: We didn't consume the '*' because if there is a '/' after it
450 // we want to interpret that as the end of the comment.
451 AddError(
452 "\"/*\" inside block comment. Block comments cannot be nested.");
453 } else if (current_char_ == '\0') {
454 AddError("End-of-file inside block comment.");
455 error_collector_->AddError(
456 start_line, start_column, " Comment started here.");
457 break;
458 }
459 }
460 }
461
462 // -------------------------------------------------------------------
463
Next()464 bool Tokenizer::Next() {
465 TokenType last_token_type = current_.type;
466
467 // Did we skip any characters after the last token?
468 bool skipped_stuff = false;
469
470 while (!read_error_) {
471 if (TryConsumeOne<Whitespace>()) {
472 ConsumeZeroOrMore<Whitespace>();
473
474 } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
475 // Starting a comment?
476 if (TryConsume('/')) {
477 ConsumeLineComment();
478 } else if (TryConsume('*')) {
479 ConsumeBlockComment();
480 } else {
481 // Oops, it was just a slash. Return it.
482 current_.type = TYPE_SYMBOL;
483 current_.text = "/";
484 current_.line = line_;
485 current_.column = column_ - 1;
486 return true;
487 }
488
489 } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
490 ConsumeLineComment();
491
492 } else if (LookingAt<Unprintable>() || current_char_ == '\0') {
493 AddError("Invalid control characters encountered in text.");
494 NextChar();
495 // Skip more unprintable characters, too. But, remember that '\0' is
496 // also what current_char_ is set to after EOF / read error. We have
497 // to be careful not to go into an infinite loop of trying to consume
498 // it, so make sure to check read_error_ explicitly before consuming
499 // '\0'.
500 while (TryConsumeOne<Unprintable>() ||
501 (!read_error_ && TryConsume('\0'))) {
502 // Ignore.
503 }
504
505 } else {
506 // Reading some sort of token.
507 StartToken();
508
509 if (TryConsumeOne<Letter>()) {
510 ConsumeZeroOrMore<Alphanumeric>();
511 current_.type = TYPE_IDENTIFIER;
512 } else if (TryConsume('0')) {
513 current_.type = ConsumeNumber(true, false);
514 } else if (TryConsume('.')) {
515 // This could be the beginning of a floating-point number, or it could
516 // just be a '.' symbol.
517
518 if (TryConsumeOne<Digit>()) {
519 // It's a floating-point number.
520 if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) {
521 // We don't accept syntax like "blah.123".
522 error_collector_->AddError(line_, column_ - 2,
523 "Need space between identifier and decimal point.");
524 }
525 current_.type = ConsumeNumber(false, true);
526 } else {
527 current_.type = TYPE_SYMBOL;
528 }
529 } else if (TryConsumeOne<Digit>()) {
530 current_.type = ConsumeNumber(false, false);
531 } else if (TryConsume('\"')) {
532 ConsumeString('\"');
533 current_.type = TYPE_STRING;
534 } else if (TryConsume('\'')) {
535 ConsumeString('\'');
536 current_.type = TYPE_STRING;
537 } else {
538 NextChar();
539 current_.type = TYPE_SYMBOL;
540 }
541
542 EndToken();
543 return true;
544 }
545
546 skipped_stuff = true;
547 }
548
549 // EOF
550 current_.type = TYPE_END;
551 current_.text.clear();
552 current_.line = line_;
553 current_.column = column_;
554 return false;
555 }
556
557 // -------------------------------------------------------------------
558 // Token-parsing helpers. Remember that these don't need to report
559 // errors since any errors should already have been reported while
560 // tokenizing. Also, these can assume that whatever text they
561 // are given is text that the tokenizer actually parsed as a token
562 // of the given type.
563
ParseInteger(const string & text,uint64 max_value,uint64 * output)564 bool Tokenizer::ParseInteger(const string& text, uint64 max_value,
565 uint64* output) {
566 // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull()
567 // is non-standard. I hate the C standard library. :(
568
569 // return strtoull(text.c_str(), NULL, 0);
570
571 const char* ptr = text.c_str();
572 int base = 10;
573 if (ptr[0] == '0') {
574 if (ptr[1] == 'x' || ptr[1] == 'X') {
575 // This is hex.
576 base = 16;
577 ptr += 2;
578 } else {
579 // This is octal.
580 base = 8;
581 }
582 }
583
584 uint64 result = 0;
585 for (; *ptr != '\0'; ptr++) {
586 int digit = DigitValue(*ptr);
587 GOOGLE_LOG_IF(DFATAL, digit < 0 || digit >= base)
588 << " Tokenizer::ParseInteger() passed text that could not have been"
589 " tokenized as an integer: " << CEscape(text);
590 if (digit > max_value || result > (max_value - digit) / base) {
591 // Overflow.
592 return false;
593 }
594 result = result * base + digit;
595 }
596
597 *output = result;
598 return true;
599 }
600
ParseFloat(const string & text)601 double Tokenizer::ParseFloat(const string& text) {
602 const char* start = text.c_str();
603 char* end;
604 double result = NoLocaleStrtod(start, &end);
605
606 // "1e" is not a valid float, but if the tokenizer reads it, it will
607 // report an error but still return it as a valid token. We need to
608 // accept anything the tokenizer could possibly return, error or not.
609 if (*end == 'e' || *end == 'E') {
610 ++end;
611 if (*end == '-' || *end == '+') ++end;
612 }
613
614 // If the Tokenizer had allow_f_after_float_ enabled, the float may be
615 // suffixed with the letter 'f'.
616 if (*end == 'f' || *end == 'F') {
617 ++end;
618 }
619
620 GOOGLE_LOG_IF(DFATAL, end - start != text.size() || *start == '-')
621 << " Tokenizer::ParseFloat() passed text that could not have been"
622 " tokenized as a float: " << CEscape(text);
623 return result;
624 }
625
ParseStringAppend(const string & text,string * output)626 void Tokenizer::ParseStringAppend(const string& text, string* output) {
627 // Reminder: text[0] is always the quote character. (If text is
628 // empty, it's invalid, so we'll just return.)
629 if (text.empty()) {
630 GOOGLE_LOG(DFATAL)
631 << " Tokenizer::ParseStringAppend() passed text that could not"
632 " have been tokenized as a string: " << CEscape(text);
633 return;
634 }
635
636 output->reserve(output->size() + text.size());
637
638 // Loop through the string copying characters to "output" and
639 // interpreting escape sequences. Note that any invalid escape
640 // sequences or other errors were already reported while tokenizing.
641 // In this case we do not need to produce valid results.
642 for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) {
643 if (*ptr == '\\' && ptr[1] != '\0') {
644 // An escape sequence.
645 ++ptr;
646
647 if (OctalDigit::InClass(*ptr)) {
648 // An octal escape. May one, two, or three digits.
649 int code = DigitValue(*ptr);
650 if (OctalDigit::InClass(ptr[1])) {
651 ++ptr;
652 code = code * 8 + DigitValue(*ptr);
653 }
654 if (OctalDigit::InClass(ptr[1])) {
655 ++ptr;
656 code = code * 8 + DigitValue(*ptr);
657 }
658 output->push_back(static_cast<char>(code));
659
660 } else if (*ptr == 'x') {
661 // A hex escape. May zero, one, or two digits. (The zero case
662 // will have been caught as an error earlier.)
663 int code = 0;
664 if (HexDigit::InClass(ptr[1])) {
665 ++ptr;
666 code = DigitValue(*ptr);
667 }
668 if (HexDigit::InClass(ptr[1])) {
669 ++ptr;
670 code = code * 16 + DigitValue(*ptr);
671 }
672 output->push_back(static_cast<char>(code));
673
674 } else {
675 // Some other escape code.
676 output->push_back(TranslateEscape(*ptr));
677 }
678
679 } else if (*ptr == text[0]) {
680 // Ignore quote matching the starting quote.
681 } else {
682 output->push_back(*ptr);
683 }
684 }
685
686 return;
687 }
688
689 } // namespace io
690 } // namespace protobuf
691 } // namespace google
692