1 /* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 package org.brotli.dec; 8 9 import java.nio.ByteBuffer; 10 11 /** 12 * Transformations on dictionary words. 13 * 14 * Transform descriptor is a triplet: {prefix, operator, suffix}. 15 * "prefix" and "suffix" are short strings inserted before and after transformed dictionary word. 16 * "operator" is applied to dictionary word itself. 17 * 18 * Some operators has "built-in" parameters, i.e. parameter is defined by operator ordinal. Other 19 * operators have "external" parameters, supplied via additional table encoded in shared dictionary. 20 * 21 * Operators: 22 * - IDENTITY (0): dictionary word is inserted "as is" 23 * - OMIT_LAST_N (1 - 9): last N octets of dictionary word are not inserted; N == ordinal 24 * - OMIT_FIRST_M (12-20): first M octets of dictionary word are not inserted; M == ordinal - 11 25 * - UPPERCASE_FIRST (10): first "scalar" is XOR'ed with number 32 26 * - UPPERCASE_ALL (11): all "scalars" are XOR'ed with number 32 27 * - SHIFT_FIRST (21): first "scalar" is shifted by number form parameter table 28 * - SHIFT_ALL (22): all "scalar" is shifted by number form parameter table 29 * 30 * Here "scalar" is a variable length character coding similar to UTF-8 encoding. 31 * UPPERCASE_XXX / SHIFT_XXX operators were designed to change the case of UTF-8 encoded characters. 32 * While UPPERCASE_XXX works well only on ASCII charset, SHIFT is much more generic and could be 33 * used for most (all?) alphabets. 34 */ 35 final class Transform { 36 37 static final class Transforms { 38 final int numTransforms; 39 final int[] triplets; 40 final byte[] prefixSuffixStorage; 41 final int[] prefixSuffixHeads; 42 final short[] params; 43 Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount)44 Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount) { 45 this.numTransforms = numTransforms; 46 this.triplets = new int[numTransforms * 3]; 47 this.params = new short[numTransforms]; 48 this.prefixSuffixStorage = new byte[prefixSuffixLen]; 49 this.prefixSuffixHeads = new int[prefixSuffixCount + 1]; 50 } 51 } 52 53 static final int NUM_RFC_TRANSFORMS = 121; 54 static final Transforms RFC_TRANSFORMS = new Transforms(NUM_RFC_TRANSFORMS, 167, 50); 55 56 private static final int OMIT_FIRST_LAST_LIMIT = 9; 57 58 private static final int IDENTITY = 0; 59 private static final int OMIT_LAST_BASE = IDENTITY + 1 - 1; // there is no OMIT_LAST_0. 60 private static final int UPPERCASE_FIRST = OMIT_LAST_BASE + OMIT_FIRST_LAST_LIMIT + 1; 61 private static final int UPPERCASE_ALL = UPPERCASE_FIRST + 1; 62 private static final int OMIT_FIRST_BASE = UPPERCASE_ALL + 1 - 1; // there is no OMIT_FIRST_0. 63 private static final int SHIFT_FIRST = OMIT_FIRST_BASE + OMIT_FIRST_LAST_LIMIT + 1; 64 private static final int SHIFT_ALL = SHIFT_FIRST + 1; 65 66 // Bundle of 0-terminated strings. 67 private static final String PREFIX_SUFFIX_SRC = "# #s #, #e #.# the #.com/#\u00C2\u00A0# of # and" 68 + " # in # to #\"#\">#\n#]# for # a # that #. # with #'# from # by #. The # on # as # is #ing" 69 + " #\n\t#:#ed #(# at #ly #=\"# of the #. This #,# not #er #al #='#ful #ive #less #est #ize #" 70 + "ous #"; 71 private static final String TRANSFORMS_SRC = " !! ! , *! &! \" ! ) * * - ! # ! #!*! " 72 + "+ ,$ ! - % . / # 0 1 . \" 2 3!* 4% ! # / 5 6 7 8 0 1 & $ 9 + : " 73 + " ; < ' != > ?! 4 @ 4 2 & A *# ( B C& ) % ) !*# *-% A +! *. D! %' & E *6 F " 74 + " G% ! *A *% H! D I!+! J!+ K +- *4! A L!*4 M N +6 O!*% +.! K *G P +%( ! G *D +D " 75 + " Q +# *K!*G!+D!+# +G +A +4!+% +K!+4!*D!+K!*K"; 76 unpackTransforms(byte[] prefixSuffix, int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc)77 private static void unpackTransforms(byte[] prefixSuffix, 78 int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc) { 79 int n = prefixSuffixSrc.length(); 80 int index = 1; 81 int j = 0; 82 for (int i = 0; i < n; ++i) { 83 char c = prefixSuffixSrc.charAt(i); 84 if (c == 35) { // == # 85 prefixSuffixHeads[index++] = j; 86 } else { 87 prefixSuffix[j++] = (byte) c; 88 } 89 } 90 91 for (int i = 0; i < NUM_RFC_TRANSFORMS * 3; ++i) { 92 transforms[i] = transformsSrc.charAt(i) - 32; 93 } 94 } 95 96 static { unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads, RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC)97 unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads, 98 RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC); 99 } 100 transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset, int len, Transforms transforms, int transformIndex)101 static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset, 102 int len, Transforms transforms, int transformIndex) { 103 int offset = dstOffset; 104 int[] triplets = transforms.triplets; 105 byte[] prefixSuffixStorage = transforms.prefixSuffixStorage; 106 int[] prefixSuffixHeads = transforms.prefixSuffixHeads; 107 int transformOffset = 3 * transformIndex; 108 int prefixIdx = triplets[transformOffset]; 109 int transformType = triplets[transformOffset + 1]; 110 int suffixIdx = triplets[transformOffset + 2]; 111 int prefix = prefixSuffixHeads[prefixIdx]; 112 int prefixEnd = prefixSuffixHeads[prefixIdx + 1]; 113 int suffix = prefixSuffixHeads[suffixIdx]; 114 int suffixEnd = prefixSuffixHeads[suffixIdx + 1]; 115 116 int omitFirst = transformType - OMIT_FIRST_BASE; 117 int omitLast = transformType - OMIT_LAST_BASE; 118 if (omitFirst < 1 || omitFirst > OMIT_FIRST_LAST_LIMIT) { 119 omitFirst = 0; 120 } 121 if (omitLast < 1 || omitLast > OMIT_FIRST_LAST_LIMIT) { 122 omitLast = 0; 123 } 124 125 // Copy prefix. 126 while (prefix != prefixEnd) { 127 dst[offset++] = prefixSuffixStorage[prefix++]; 128 } 129 130 // Copy trimmed word. 131 if (omitFirst > len) { 132 omitFirst = len; 133 } 134 srcOffset += omitFirst; 135 len -= omitFirst; 136 len -= omitLast; 137 int i = len; 138 while (i > 0) { 139 dst[offset++] = src.get(srcOffset++); 140 i--; 141 } 142 143 // Ferment. 144 if (transformType == UPPERCASE_FIRST || transformType == UPPERCASE_ALL) { 145 int uppercaseOffset = offset - len; 146 if (transformType == UPPERCASE_FIRST) { 147 len = 1; 148 } 149 while (len > 0) { 150 int c0 = dst[uppercaseOffset] & 0xFF; 151 if (c0 < 0xC0) { 152 if (c0 >= 97 && c0 <= 122) { // in [a..z] range 153 dst[uppercaseOffset] ^= (byte) 32; 154 } 155 uppercaseOffset += 1; 156 len -= 1; 157 } else if (c0 < 0xE0) { 158 dst[uppercaseOffset + 1] ^= (byte) 32; 159 uppercaseOffset += 2; 160 len -= 2; 161 } else { 162 dst[uppercaseOffset + 2] ^= (byte) 5; 163 uppercaseOffset += 3; 164 len -= 3; 165 } 166 } 167 } else if (transformType == SHIFT_FIRST || transformType == SHIFT_ALL) { 168 int shiftOffset = offset - len; 169 short param = transforms.params[transformIndex]; 170 /* Limited sign extension: scalar < (1 << 24). */ 171 int scalar = (param & 0x7FFF) + (0x1000000 - (param & 0x8000)); 172 while (len > 0) { 173 int step = 1; 174 int c0 = dst[shiftOffset] & 0xFF; 175 if (c0 < 0x80) { 176 /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */ 177 scalar += c0; 178 dst[shiftOffset] = (byte) (scalar & 0x7F); 179 } else if (c0 < 0xC0) { 180 /* Continuation / 10AAAAAA. */ 181 } else if (c0 < 0xE0) { 182 /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */ 183 if (len >= 2) { 184 byte c1 = dst[shiftOffset + 1]; 185 scalar += (c1 & 0x3F) | ((c0 & 0x1F) << 6); 186 dst[shiftOffset] = (byte) (0xC0 | ((scalar >> 6) & 0x1F)); 187 dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | (scalar & 0x3F)); 188 step = 2; 189 } else { 190 step = len; 191 } 192 } else if (c0 < 0xF0) { 193 /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */ 194 if (len >= 3) { 195 byte c1 = dst[shiftOffset + 1]; 196 byte c2 = dst[shiftOffset + 2]; 197 scalar += (c2 & 0x3F) | ((c1 & 0x3F) << 6) | ((c0 & 0x0F) << 12); 198 dst[shiftOffset] = (byte) (0xE0 | ((scalar >> 12) & 0x0F)); 199 dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 6) & 0x3F)); 200 dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | (scalar & 0x3F)); 201 step = 3; 202 } else { 203 step = len; 204 } 205 } else if (c0 < 0xF8) { 206 /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */ 207 if (len >= 4) { 208 byte c1 = dst[shiftOffset + 1]; 209 byte c2 = dst[shiftOffset + 2]; 210 byte c3 = dst[shiftOffset + 3]; 211 scalar += (c3 & 0x3F) | ((c2 & 0x3F) << 6) | ((c1 & 0x3F) << 12) | ((c0 & 0x07) << 18); 212 dst[shiftOffset] = (byte) (0xF0 | ((scalar >> 18) & 0x07)); 213 dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 12) & 0x3F)); 214 dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | ((scalar >> 6) & 0x3F)); 215 dst[shiftOffset + 3] = (byte) ((c3 & 0xC0) | (scalar & 0x3F)); 216 step = 4; 217 } else { 218 step = len; 219 } 220 } 221 shiftOffset += step; 222 len -= step; 223 if (transformType == SHIFT_FIRST) { 224 len = 0; 225 } 226 } 227 } 228 229 // Copy suffix. 230 while (suffix != suffixEnd) { 231 dst[offset++] = prefixSuffixStorage[suffix++]; 232 } 233 234 return offset - dstOffset; 235 } 236 } 237