1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 /** 34 * Provide text format escaping support for proto2 instances. 35 */ 36 final class TextFormatEscaper { TextFormatEscaper()37 private TextFormatEscaper() {} 38 39 private interface ByteSequence { size()40 int size(); byteAt(int offset)41 byte byteAt(int offset); 42 } 43 44 /** 45 * Escapes bytes in the format used in protocol buffer text format, which 46 * is the same as the format used for C string literals. All bytes 47 * that are not printable 7-bit ASCII characters are escaped, as well as 48 * backslash, single-quote, and double-quote characters. Characters for 49 * which no defined short-hand escape sequence is defined will be escaped 50 * using 3-digit octal sequences. 51 */ escapeBytes(final ByteSequence input)52 static String escapeBytes(final ByteSequence input) { 53 final StringBuilder builder = new StringBuilder(input.size()); 54 for (int i = 0; i < input.size(); i++) { 55 final byte b = input.byteAt(i); 56 switch (b) { 57 // Java does not recognize \a or \v, apparently. 58 case 0x07: builder.append("\\a"); break; 59 case '\b': builder.append("\\b"); break; 60 case '\f': builder.append("\\f"); break; 61 case '\n': builder.append("\\n"); break; 62 case '\r': builder.append("\\r"); break; 63 case '\t': builder.append("\\t"); break; 64 case 0x0b: builder.append("\\v"); break; 65 case '\\': builder.append("\\\\"); break; 66 case '\'': builder.append("\\\'"); break; 67 case '"' : builder.append("\\\""); break; 68 default: 69 // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are 70 // printable. Other byte values must be escaped. 71 if (b >= 0x20 && b <= 0x7e) { 72 builder.append((char) b); 73 } else { 74 builder.append('\\'); 75 builder.append((char) ('0' + ((b >>> 6) & 3))); 76 builder.append((char) ('0' + ((b >>> 3) & 7))); 77 builder.append((char) ('0' + (b & 7))); 78 } 79 break; 80 } 81 } 82 return builder.toString(); 83 } 84 85 /** 86 * Escapes bytes in the format used in protocol buffer text format, which 87 * is the same as the format used for C string literals. All bytes 88 * that are not printable 7-bit ASCII characters are escaped, as well as 89 * backslash, single-quote, and double-quote characters. Characters for 90 * which no defined short-hand escape sequence is defined will be escaped 91 * using 3-digit octal sequences. 92 */ escapeBytes(final ByteString input)93 static String escapeBytes(final ByteString input) { 94 return escapeBytes(new ByteSequence() { 95 @Override 96 public int size() { 97 return input.size(); 98 } 99 @Override 100 public byte byteAt(int offset) { 101 return input.byteAt(offset); 102 } 103 }); 104 } 105 106 /** 107 * Like {@link #escapeBytes(ByteString)}, but used for byte array. 108 */ 109 static String escapeBytes(final byte[] input) { 110 return escapeBytes(new ByteSequence() { 111 @Override 112 public int size() { 113 return input.length; 114 } 115 @Override 116 public byte byteAt(int offset) { 117 return input[offset]; 118 } 119 }); 120 } 121 122 /** 123 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. 124 * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped 125 * individually as a 3-digit octal escape. Yes, it's weird. 126 */ 127 static String escapeText(final String input) { 128 return escapeBytes(ByteString.copyFromUtf8(input)); 129 } 130 131 /** 132 * Escape double quotes and backslashes in a String for unicode output of a message. 133 */ 134 static String escapeDoubleQuotesAndBackslashes(final String input) { 135 return input.replace("\\", "\\\\").replace("\"", "\\\""); 136 } 137 } 138