1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 /** Provide text format escaping support for proto2 instances. */ 34 final class TextFormatEscaper { TextFormatEscaper()35 private TextFormatEscaper() {} 36 37 private interface ByteSequence { size()38 int size(); 39 byteAt(int offset)40 byte byteAt(int offset); 41 } 42 43 /** 44 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 45 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 46 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 47 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 48 */ escapeBytes(final ByteSequence input)49 static String escapeBytes(final ByteSequence input) { 50 final StringBuilder builder = new StringBuilder(input.size()); 51 for (int i = 0; i < input.size(); i++) { 52 final byte b = input.byteAt(i); 53 switch (b) { 54 // Java does not recognize \a or \v, apparently. 55 case 0x07: 56 builder.append("\\a"); 57 break; 58 case '\b': 59 builder.append("\\b"); 60 break; 61 case '\f': 62 builder.append("\\f"); 63 break; 64 case '\n': 65 builder.append("\\n"); 66 break; 67 case '\r': 68 builder.append("\\r"); 69 break; 70 case '\t': 71 builder.append("\\t"); 72 break; 73 case 0x0b: 74 builder.append("\\v"); 75 break; 76 case '\\': 77 builder.append("\\\\"); 78 break; 79 case '\'': 80 builder.append("\\\'"); 81 break; 82 case '"': 83 builder.append("\\\""); 84 break; 85 default: 86 // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are 87 // printable. Other byte values must be escaped. 88 if (b >= 0x20 && b <= 0x7e) { 89 builder.append((char) b); 90 } else { 91 builder.append('\\'); 92 builder.append((char) ('0' + ((b >>> 6) & 3))); 93 builder.append((char) ('0' + ((b >>> 3) & 7))); 94 builder.append((char) ('0' + (b & 7))); 95 } 96 break; 97 } 98 } 99 return builder.toString(); 100 } 101 102 /** 103 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 104 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 105 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 106 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 107 */ escapeBytes(final ByteString input)108 static String escapeBytes(final ByteString input) { 109 return escapeBytes( 110 new ByteSequence() { 111 @Override 112 public int size() { 113 return input.size(); 114 } 115 116 @Override 117 public byte byteAt(int offset) { 118 return input.byteAt(offset); 119 } 120 }); 121 } 122 123 /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */ 124 static String escapeBytes(final byte[] input) { 125 return escapeBytes( 126 new ByteSequence() { 127 @Override 128 public int size() { 129 return input.length; 130 } 131 132 @Override 133 public byte byteAt(int offset) { 134 return input[offset]; 135 } 136 }); 137 } 138 139 /** 140 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are 141 * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes, 142 * it's weird. 143 */ 144 static String escapeText(final String input) { 145 return escapeBytes(ByteString.copyFromUtf8(input)); 146 } 147 148 /** Escape double quotes and backslashes in a String for unicode output of a message. */ 149 static String escapeDoubleQuotesAndBackslashes(final String input) { 150 return input.replace("\\", "\\\\").replace("\"", "\\\""); 151 } 152 } 153