• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 package org.brotli.dec;
8 
9 import java.nio.ByteBuffer;
10 
11 /**
12  * Transformations on dictionary words.
13  *
14  * Transform descriptor is a triplet: {prefix, operator, suffix}.
15  * "prefix" and "suffix" are short strings inserted before and after transformed dictionary word.
16  * "operator" is applied to dictionary word itself.
17  *
18  * Some operators has "built-in" parameters, i.e. parameter is defined by operator ordinal. Other
19  * operators have "external" parameters, supplied via additional table encoded in shared dictionary.
20  *
21  * Operators:
22  *  - IDENTITY (0): dictionary word is inserted "as is"
23  *  - OMIT_LAST_N (1 - 9): last N octets of dictionary word are not inserted; N == ordinal
24  *  - OMIT_FIRST_M (12-20): first M octets of dictionary word are not inserted; M == ordinal - 11
25  *  - UPPERCASE_FIRST (10): first "scalar" is XOR'ed with number 32
26  *  - UPPERCASE_ALL (11): all "scalars" are XOR'ed with number 32
27  *  - SHIFT_FIRST (21): first "scalar" is shifted by number form parameter table
28  *  - SHIFT_ALL (22): all "scalar" is shifted by number form parameter table
29  *
30  * Here "scalar" is a variable length character coding similar to UTF-8 encoding.
31  * UPPERCASE_XXX / SHIFT_XXX operators were designed to change the case of UTF-8 encoded characters.
32  * While UPPERCASE_XXX works well only on ASCII charset, SHIFT is much more generic and could be
33  * used for most (all?) alphabets.
34  */
35 final class Transform {
36 
37   static final class Transforms {
38     final int numTransforms;
39     final int[] triplets;
40     final byte[] prefixSuffixStorage;
41     final int[] prefixSuffixHeads;
42     final short[] params;
43 
Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount)44     Transforms(int numTransforms, int prefixSuffixLen, int prefixSuffixCount) {
45       this.numTransforms = numTransforms;
46       this.triplets = new int[numTransforms * 3];
47       this.params = new short[numTransforms];
48       this.prefixSuffixStorage = new byte[prefixSuffixLen];
49       this.prefixSuffixHeads = new int[prefixSuffixCount + 1];
50     }
51   }
52 
53   static final int NUM_RFC_TRANSFORMS = 121;
54   static final Transforms RFC_TRANSFORMS = new Transforms(NUM_RFC_TRANSFORMS, 167, 50);
55 
56   private static final int OMIT_FIRST_LAST_LIMIT = 9;
57 
58   private static final int IDENTITY = 0;
59   private static final int OMIT_LAST_BASE = IDENTITY + 1 - 1;  // there is no OMIT_LAST_0.
60   private static final int UPPERCASE_FIRST = OMIT_LAST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
61   private static final int UPPERCASE_ALL = UPPERCASE_FIRST + 1;
62   private static final int OMIT_FIRST_BASE = UPPERCASE_ALL + 1 - 1;  // there is no OMIT_FIRST_0.
63   private static final int SHIFT_FIRST = OMIT_FIRST_BASE + OMIT_FIRST_LAST_LIMIT + 1;
64   private static final int SHIFT_ALL = SHIFT_FIRST + 1;
65 
66   // Bundle of 0-terminated strings.
67   private static final String PREFIX_SUFFIX_SRC = "# #s #, #e #.# the #.com/#\u00C2\u00A0# of # and"
68       + " # in # to #\"#\">#\n#]# for # a # that #. # with #'# from # by #. The # on # as # is #ing"
69       + " #\n\t#:#ed #(# at #ly #=\"# of the #. This #,# not #er #al #='#ful #ive #less #est #ize #"
70       + "ous #";
71   private static final String TRANSFORMS_SRC = "     !! ! ,  *!  &!  \" !  ) *   * -  ! # !  #!*!  "
72       + "+  ,$ !  -  %  .  / #   0  1 .  \"   2  3!*   4%  ! # /   5  6  7  8 0  1 &   $   9 +   : "
73       + " ;  < '  !=  >  ?! 4  @ 4  2  &   A *# (   B  C& ) %  ) !*# *-% A +! *.  D! %'  & E *6  F "
74       + " G% ! *A *%  H! D  I!+!  J!+   K +- *4! A  L!*4  M  N +6  O!*% +.! K *G  P +%(  ! G *D +D "
75       + " Q +# *K!*G!+D!+# +G +A +4!+% +K!+4!*D!+K!*K";
76 
unpackTransforms(byte[] prefixSuffix, int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc)77   private static void unpackTransforms(byte[] prefixSuffix,
78       int[] prefixSuffixHeads, int[] transforms, String prefixSuffixSrc, String transformsSrc) {
79     int n = prefixSuffixSrc.length();
80     int index = 1;
81     int j = 0;
82     for (int i = 0; i < n; ++i) {
83       char c = prefixSuffixSrc.charAt(i);
84       if (c == 35) { // == #
85         prefixSuffixHeads[index++] = j;
86       } else {
87         prefixSuffix[j++] = (byte) c;
88       }
89     }
90 
91     for (int i = 0; i < NUM_RFC_TRANSFORMS * 3; ++i) {
92       transforms[i] = transformsSrc.charAt(i) - 32;
93     }
94   }
95 
96   static {
unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads, RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC)97     unpackTransforms(RFC_TRANSFORMS.prefixSuffixStorage, RFC_TRANSFORMS.prefixSuffixHeads,
98         RFC_TRANSFORMS.triplets, PREFIX_SUFFIX_SRC, TRANSFORMS_SRC);
99   }
100 
transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset, int len, Transforms transforms, int transformIndex)101   static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer src, int srcOffset,
102       int len, Transforms transforms, int transformIndex) {
103     int offset = dstOffset;
104     int[] triplets = transforms.triplets;
105     byte[] prefixSuffixStorage = transforms.prefixSuffixStorage;
106     int[] prefixSuffixHeads = transforms.prefixSuffixHeads;
107     int transformOffset = 3 * transformIndex;
108     int prefixIdx = triplets[transformOffset];
109     int transformType = triplets[transformOffset + 1];
110     int suffixIdx = triplets[transformOffset + 2];
111     int prefix = prefixSuffixHeads[prefixIdx];
112     int prefixEnd = prefixSuffixHeads[prefixIdx + 1];
113     int suffix = prefixSuffixHeads[suffixIdx];
114     int suffixEnd = prefixSuffixHeads[suffixIdx + 1];
115 
116     int omitFirst = transformType - OMIT_FIRST_BASE;
117     int omitLast = transformType - OMIT_LAST_BASE;
118     if (omitFirst < 1 || omitFirst > OMIT_FIRST_LAST_LIMIT) {
119       omitFirst = 0;
120     }
121     if (omitLast < 1 || omitLast > OMIT_FIRST_LAST_LIMIT) {
122       omitLast = 0;
123     }
124 
125     // Copy prefix.
126     while (prefix != prefixEnd) {
127       dst[offset++] = prefixSuffixStorage[prefix++];
128     }
129 
130     // Copy trimmed word.
131     if (omitFirst > len) {
132       omitFirst = len;
133     }
134     srcOffset += omitFirst;
135     len -= omitFirst;
136     len -= omitLast;
137     int i = len;
138     while (i > 0) {
139       dst[offset++] = src.get(srcOffset++);
140       i--;
141     }
142 
143     // Ferment.
144     if (transformType == UPPERCASE_FIRST || transformType == UPPERCASE_ALL) {
145       int uppercaseOffset = offset - len;
146       if (transformType == UPPERCASE_FIRST) {
147         len = 1;
148       }
149       while (len > 0) {
150         int c0 = dst[uppercaseOffset] & 0xFF;
151         if (c0 < 0xC0) {
152           if (c0 >= 97 && c0 <= 122) { // in [a..z] range
153             dst[uppercaseOffset] ^= (byte) 32;
154           }
155           uppercaseOffset += 1;
156           len -= 1;
157         } else if (c0 < 0xE0) {
158           dst[uppercaseOffset + 1] ^= (byte) 32;
159           uppercaseOffset += 2;
160           len -= 2;
161         } else {
162           dst[uppercaseOffset + 2] ^= (byte) 5;
163           uppercaseOffset += 3;
164           len -= 3;
165         }
166       }
167     } else if (transformType == SHIFT_FIRST || transformType == SHIFT_ALL) {
168       int shiftOffset = offset - len;
169       short param = transforms.params[transformIndex];
170       /* Limited sign extension: scalar < (1 << 24). */
171       int scalar = (param & 0x7FFF) + (0x1000000 - (param & 0x8000));
172       while (len > 0) {
173         int step = 1;
174         int c0 = dst[shiftOffset] & 0xFF;
175         if (c0 < 0x80) {
176           /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
177           scalar += c0;
178           dst[shiftOffset] = (byte) (scalar & 0x7F);
179         } else if (c0 < 0xC0) {
180           /* Continuation / 10AAAAAA. */
181         } else if (c0 < 0xE0) {
182           /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
183           if (len >= 2) {
184             byte c1 = dst[shiftOffset + 1];
185             scalar += (c1 & 0x3F) | ((c0 & 0x1F) << 6);
186             dst[shiftOffset] = (byte) (0xC0 | ((scalar >> 6) & 0x1F));
187             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | (scalar & 0x3F));
188             step = 2;
189           } else {
190             step = len;
191           }
192         } else if (c0 < 0xF0) {
193           /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
194           if (len >= 3) {
195             byte c1 = dst[shiftOffset + 1];
196             byte c2 = dst[shiftOffset + 2];
197             scalar += (c2 & 0x3F) | ((c1 & 0x3F) << 6) | ((c0 & 0x0F) << 12);
198             dst[shiftOffset] = (byte) (0xE0 | ((scalar >> 12) & 0x0F));
199             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 6) & 0x3F));
200             dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | (scalar & 0x3F));
201             step = 3;
202           } else {
203             step = len;
204           }
205         } else if (c0 < 0xF8) {
206           /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
207           if (len >= 4) {
208             byte c1 = dst[shiftOffset + 1];
209             byte c2 = dst[shiftOffset + 2];
210             byte c3 = dst[shiftOffset + 3];
211             scalar += (c3 & 0x3F) | ((c2 & 0x3F) << 6) | ((c1 & 0x3F) << 12) | ((c0 & 0x07) << 18);
212             dst[shiftOffset] = (byte) (0xF0 | ((scalar >> 18) & 0x07));
213             dst[shiftOffset + 1] = (byte) ((c1 & 0xC0) | ((scalar >> 12) & 0x3F));
214             dst[shiftOffset + 2] = (byte) ((c2 & 0xC0) | ((scalar >> 6) & 0x3F));
215             dst[shiftOffset + 3] = (byte) ((c3 & 0xC0) | (scalar & 0x3F));
216             step = 4;
217           } else {
218             step = len;
219           }
220         }
221         shiftOffset += step;
222         len -= step;
223         if (transformType == SHIFT_FIRST) {
224           len = 0;
225         }
226       }
227     }
228 
229     // Copy suffix.
230     while (suffix != suffixEnd) {
231       dst[offset++] = prefixSuffixStorage[suffix++];
232     }
233 
234     return offset - dstOffset;
235   }
236 }
237