• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 Square, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.squareup.okhttp;
17 
18 import java.net.URI;
19 import java.net.URL;
20 import java.util.Collections;
21 import java.util.LinkedHashMap;
22 import java.util.Map;
23 import okio.Buffer;
24 import okio.ByteString;
25 
26 import static org.junit.Assert.fail;
27 
28 /** Tests how each code point is encoded and decoded in the context of each URL component. */
29 class UrlComponentEncodingTester {
30   private static final int UNICODE_2 = 0x07ff; // Arbitrary code point that's 2 bytes in UTF-8.
31   private static final int UNICODE_3 = 0xffff; // Arbitrary code point that's 3 bytes in UTF-8.
32   private static final int UNICODE_4 = 0x10ffff; // Arbitrary code point that's 4 bytes in UTF-8.
33 
34   /**
35    * The default encode set for the ASCII range. The specific rules vary per-component: for example,
36    * '?' may be identity-encoded in a fragment, but must be percent-encoded in a path.
37    *
38    * See https://url.spec.whatwg.org/#percent-encoded-bytes
39    */
40   private static final Map<Integer, Encoding> defaultEncodings;
41   static {
42     Map<Integer, Encoding> map = new LinkedHashMap<>();
43     map.put(       0x0, Encoding.PERCENT); // Null character
44     map.put(       0x1, Encoding.PERCENT); // Start of Header
45     map.put(       0x2, Encoding.PERCENT); // Start of Text
46     map.put(       0x3, Encoding.PERCENT); // End of Text
47     map.put(       0x4, Encoding.PERCENT); // End of Transmission
48     map.put(       0x5, Encoding.PERCENT); // Enquiry
49     map.put(       0x6, Encoding.PERCENT); // Acknowledgment
50     map.put(       0x7, Encoding.PERCENT); // Bell
map.put(int)51     map.put((int) '\b', Encoding.PERCENT); // Backspace
map.put(int)52     map.put((int) '\t', Encoding.SKIP);    // Horizontal Tab
map.put(int)53     map.put((int) '\n', Encoding.SKIP);    // Line feed
54     map.put(       0xb, Encoding.PERCENT); // Vertical Tab
map.put(int)55     map.put((int) '\f', Encoding.SKIP);    // Form feed
map.put(int)56     map.put((int) '\r', Encoding.SKIP);    // Carriage return
57     map.put(       0xe, Encoding.PERCENT); // Shift Out
58     map.put(       0xf, Encoding.PERCENT); // Shift In
59     map.put(      0x10, Encoding.PERCENT); // Data Link Escape
60     map.put(      0x11, Encoding.PERCENT); // Device Control 1 (oft. XON)
61     map.put(      0x12, Encoding.PERCENT); // Device Control 2
62     map.put(      0x13, Encoding.PERCENT); // Device Control 3 (oft. XOFF)
63     map.put(      0x14, Encoding.PERCENT); // Device Control 4
64     map.put(      0x15, Encoding.PERCENT); // Negative Acknowledgment
65     map.put(      0x16, Encoding.PERCENT); // Synchronous idle
66     map.put(      0x17, Encoding.PERCENT); // End of Transmission Block
67     map.put(      0x18, Encoding.PERCENT); // Cancel
68     map.put(      0x19, Encoding.PERCENT); // End of Medium
69     map.put(      0x1a, Encoding.PERCENT); // Substitute
70     map.put(      0x1b, Encoding.PERCENT); // Escape
71     map.put(      0x1c, Encoding.PERCENT); // File Separator
72     map.put(      0x1d, Encoding.PERCENT); // Group Separator
73     map.put(      0x1e, Encoding.PERCENT); // Record Separator
74     map.put(      0x1f, Encoding.PERCENT); // Unit Separator
map.put(int)75     map.put((int)  ' ', Encoding.PERCENT);
map.put(int)76     map.put((int)  '!', Encoding.IDENTITY);
map.put(int)77     map.put((int)  '"', Encoding.PERCENT);
map.put(int)78     map.put((int)  '#', Encoding.PERCENT);
map.put(int)79     map.put((int)  '$', Encoding.IDENTITY);
map.put(int)80     map.put((int)  '%', Encoding.IDENTITY);
map.put(int)81     map.put((int)  '&', Encoding.IDENTITY);
map.put(int)82     map.put((int) '\'', Encoding.IDENTITY);
map.put(int)83     map.put((int)  '(', Encoding.IDENTITY);
map.put(int)84     map.put((int)  ')', Encoding.IDENTITY);
map.put(int)85     map.put((int)  '*', Encoding.IDENTITY);
map.put(int)86     map.put((int)  '+', Encoding.IDENTITY);
map.put(int)87     map.put((int)  ',', Encoding.IDENTITY);
map.put(int)88     map.put((int)  '-', Encoding.IDENTITY);
map.put(int)89     map.put((int)  '.', Encoding.IDENTITY);
map.put(int)90     map.put((int)  '/', Encoding.IDENTITY);
map.put(int)91     map.put((int)  '0', Encoding.IDENTITY);
map.put(int)92     map.put((int)  '1', Encoding.IDENTITY);
map.put(int)93     map.put((int)  '2', Encoding.IDENTITY);
map.put(int)94     map.put((int)  '3', Encoding.IDENTITY);
map.put(int)95     map.put((int)  '4', Encoding.IDENTITY);
map.put(int)96     map.put((int)  '5', Encoding.IDENTITY);
map.put(int)97     map.put((int)  '6', Encoding.IDENTITY);
map.put(int)98     map.put((int)  '7', Encoding.IDENTITY);
map.put(int)99     map.put((int)  '8', Encoding.IDENTITY);
map.put(int)100     map.put((int)  '9', Encoding.IDENTITY);
map.put(int)101     map.put((int)  ':', Encoding.IDENTITY);
map.put(int)102     map.put((int)  ';', Encoding.IDENTITY);
map.put(int)103     map.put((int)  '<', Encoding.PERCENT);
map.put(int)104     map.put((int)  '=', Encoding.IDENTITY);
map.put(int)105     map.put((int)  '>', Encoding.PERCENT);
map.put(int)106     map.put((int)  '?', Encoding.PERCENT);
map.put(int)107     map.put((int)  '@', Encoding.IDENTITY);
map.put(int)108     map.put((int)  'A', Encoding.IDENTITY);
map.put(int)109     map.put((int)  'B', Encoding.IDENTITY);
map.put(int)110     map.put((int)  'C', Encoding.IDENTITY);
map.put(int)111     map.put((int)  'D', Encoding.IDENTITY);
map.put(int)112     map.put((int)  'E', Encoding.IDENTITY);
map.put(int)113     map.put((int)  'F', Encoding.IDENTITY);
map.put(int)114     map.put((int)  'G', Encoding.IDENTITY);
map.put(int)115     map.put((int)  'H', Encoding.IDENTITY);
map.put(int)116     map.put((int)  'I', Encoding.IDENTITY);
map.put(int)117     map.put((int)  'J', Encoding.IDENTITY);
map.put(int)118     map.put((int)  'K', Encoding.IDENTITY);
map.put(int)119     map.put((int)  'L', Encoding.IDENTITY);
map.put(int)120     map.put((int)  'M', Encoding.IDENTITY);
map.put(int)121     map.put((int)  'N', Encoding.IDENTITY);
map.put(int)122     map.put((int)  'O', Encoding.IDENTITY);
map.put(int)123     map.put((int)  'P', Encoding.IDENTITY);
map.put(int)124     map.put((int)  'Q', Encoding.IDENTITY);
map.put(int)125     map.put((int)  'R', Encoding.IDENTITY);
map.put(int)126     map.put((int)  'S', Encoding.IDENTITY);
map.put(int)127     map.put((int)  'T', Encoding.IDENTITY);
map.put(int)128     map.put((int)  'U', Encoding.IDENTITY);
map.put(int)129     map.put((int)  'V', Encoding.IDENTITY);
map.put(int)130     map.put((int)  'W', Encoding.IDENTITY);
map.put(int)131     map.put((int)  'X', Encoding.IDENTITY);
map.put(int)132     map.put((int)  'Y', Encoding.IDENTITY);
map.put(int)133     map.put((int)  'Z', Encoding.IDENTITY);
map.put(int)134     map.put((int)  '[', Encoding.IDENTITY);
map.put(int)135     map.put((int) '\\', Encoding.IDENTITY);
map.put(int)136     map.put((int)  ']', Encoding.IDENTITY);
map.put(int)137     map.put((int)  '^', Encoding.IDENTITY);
map.put(int)138     map.put((int)  '_', Encoding.IDENTITY);
map.put(int)139     map.put((int)  '`', Encoding.PERCENT);
map.put(int)140     map.put((int)  'a', Encoding.IDENTITY);
map.put(int)141     map.put((int)  'b', Encoding.IDENTITY);
map.put(int)142     map.put((int)  'c', Encoding.IDENTITY);
map.put(int)143     map.put((int)  'd', Encoding.IDENTITY);
map.put(int)144     map.put((int)  'e', Encoding.IDENTITY);
map.put(int)145     map.put((int)  'f', Encoding.IDENTITY);
map.put(int)146     map.put((int)  'g', Encoding.IDENTITY);
map.put(int)147     map.put((int)  'h', Encoding.IDENTITY);
map.put(int)148     map.put((int)  'i', Encoding.IDENTITY);
map.put(int)149     map.put((int)  'j', Encoding.IDENTITY);
map.put(int)150     map.put((int)  'k', Encoding.IDENTITY);
map.put(int)151     map.put((int)  'l', Encoding.IDENTITY);
map.put(int)152     map.put((int)  'm', Encoding.IDENTITY);
map.put(int)153     map.put((int)  'n', Encoding.IDENTITY);
map.put(int)154     map.put((int)  'o', Encoding.IDENTITY);
map.put(int)155     map.put((int)  'p', Encoding.IDENTITY);
map.put(int)156     map.put((int)  'q', Encoding.IDENTITY);
map.put(int)157     map.put((int)  'r', Encoding.IDENTITY);
map.put(int)158     map.put((int)  's', Encoding.IDENTITY);
map.put(int)159     map.put((int)  't', Encoding.IDENTITY);
map.put(int)160     map.put((int)  'u', Encoding.IDENTITY);
map.put(int)161     map.put((int)  'v', Encoding.IDENTITY);
map.put(int)162     map.put((int)  'w', Encoding.IDENTITY);
map.put(int)163     map.put((int)  'x', Encoding.IDENTITY);
map.put(int)164     map.put((int)  'y', Encoding.IDENTITY);
map.put(int)165     map.put((int)  'z', Encoding.IDENTITY);
map.put(int)166     map.put((int)  '{', Encoding.IDENTITY);
map.put(int)167     map.put((int)  '|', Encoding.IDENTITY);
map.put(int)168     map.put((int)  '}', Encoding.IDENTITY);
map.put(int)169     map.put((int)  '~', Encoding.IDENTITY);
170     map.put(      0x7f, Encoding.PERCENT); // Delete
map.put( UNICODE_2, Encoding.PERCENT)171     map.put( UNICODE_2, Encoding.PERCENT);
map.put( UNICODE_3, Encoding.PERCENT)172     map.put( UNICODE_3, Encoding.PERCENT);
map.put( UNICODE_4, Encoding.PERCENT)173     map.put( UNICODE_4, Encoding.PERCENT);
174     defaultEncodings = Collections.unmodifiableMap(map);
175   }
176 
177   private final Map<Integer, Encoding> encodings;
178   private final StringBuilder uriEscapedCodePoints = new StringBuilder();
179 
UrlComponentEncodingTester()180   public UrlComponentEncodingTester() {
181     this.encodings = new LinkedHashMap<>(defaultEncodings);
182   }
183 
override(Encoding encoding, int... codePoints)184   public UrlComponentEncodingTester override(Encoding encoding, int... codePoints) {
185     for (int codePoint : codePoints) {
186       encodings.put(codePoint, encoding);
187     }
188     return this;
189   }
190 
identityForNonAscii()191   public UrlComponentEncodingTester identityForNonAscii() {
192     encodings.put(UNICODE_2, Encoding.IDENTITY);
193     encodings.put(UNICODE_3, Encoding.IDENTITY);
194     encodings.put(UNICODE_4, Encoding.IDENTITY);
195     return this;
196   }
197 
198   /**
199    * Configure a character to be skipped but only for conversion to and from {@code java.net.URI}.
200    * That class is more strict than the others.
201    */
skipForUri(int... codePoints)202   public UrlComponentEncodingTester skipForUri(int... codePoints) {
203     uriEscapedCodePoints.append(new String(codePoints, 0, codePoints.length));
204     return this;
205   }
206 
test(Component component)207   public UrlComponentEncodingTester test(Component component) {
208     for (Map.Entry<Integer, Encoding> entry : encodings.entrySet()) {
209       Encoding encoding = entry.getValue();
210       int codePoint = entry.getKey();
211       testEncodeAndDecode(codePoint, component);
212       if (encoding == Encoding.SKIP) continue;
213 
214       testParseOriginal(codePoint, encoding, component);
215       testParseAlreadyEncoded(codePoint, encoding, component);
216       testToUrl(codePoint, encoding, component);
217       testFromUrl(codePoint, encoding, component);
218 
219       if (codePoint != '%') {
220         boolean uriEscaped = uriEscapedCodePoints.indexOf(
221             Encoding.IDENTITY.encode(codePoint)) != -1;
222         testUri(codePoint, encoding, component, uriEscaped);
223       }
224     }
225     return this;
226   }
227 
testParseAlreadyEncoded(int codePoint, Encoding encoding, Component component)228   private void testParseAlreadyEncoded(int codePoint, Encoding encoding, Component component) {
229     String encoded = encoding.encode(codePoint);
230     String urlString = component.urlString(encoded);
231     HttpUrl url = HttpUrl.parse(urlString);
232     if (!component.encodedValue(url).equals(encoded)) {
233       fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
234     }
235   }
236 
testEncodeAndDecode(int codePoint, Component component)237   private void testEncodeAndDecode(int codePoint, Component component) {
238     String expected = Encoding.IDENTITY.encode(codePoint);
239     HttpUrl.Builder builder = HttpUrl.parse("http://host/").newBuilder();
240     component.set(builder, expected);
241     HttpUrl url = builder.build();
242     String actual = component.get(url);
243     if (!expected.equals(actual)) {
244       fail(String.format("Roundtrip %s %#x %s", component, codePoint, url));
245     }
246   }
247 
testParseOriginal(int codePoint, Encoding encoding, Component component)248   private void testParseOriginal(int codePoint, Encoding encoding, Component component) {
249     String encoded = encoding.encode(codePoint);
250     if (encoding != Encoding.PERCENT) return;
251     String identity = Encoding.IDENTITY.encode(codePoint);
252     String urlString = component.urlString(identity);
253     HttpUrl url = HttpUrl.parse(urlString);
254 
255     String s = component.encodedValue(url);
256     if (!s.equals(encoded)) {
257       fail(String.format("Encoding %s %#02x using %s", component, codePoint, encoding));
258     }
259   }
260 
testToUrl(int codePoint, Encoding encoding, Component component)261   private void testToUrl(int codePoint, Encoding encoding, Component component) {
262     String encoded = encoding.encode(codePoint);
263     HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded));
264     URL javaNetUrl = httpUrl.url();
265     if (!javaNetUrl.toString().equals(javaNetUrl.toString())) {
266       fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
267     }
268   }
269 
testFromUrl(int codePoint, Encoding encoding, Component component)270   private void testFromUrl(int codePoint, Encoding encoding, Component component) {
271     String encoded = encoding.encode(codePoint);
272     HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded));
273     HttpUrl toAndFromJavaNetUrl = HttpUrl.get(httpUrl.url());
274     if (!toAndFromJavaNetUrl.equals(httpUrl)) {
275       fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
276     }
277   }
278 
testUri( int codePoint, Encoding encoding, Component component, boolean uriEscaped)279   private void testUri(
280       int codePoint, Encoding encoding, Component component, boolean uriEscaped) {
281     String string = new String(new int[] { codePoint }, 0, 1);
282     String encoded = encoding.encode(codePoint);
283     HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded));
284     URI uri = httpUrl.uri();
285     HttpUrl toAndFromUri = HttpUrl.get(uri);
286     if (uriEscaped) {
287       // The URI has more escaping than the HttpURL. Check that the decoded values still match.
288       if (uri.toString().equals(httpUrl.toString())) {
289         fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
290       }
291       if (!component.get(toAndFromUri).equals(string)) {
292         fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
293       }
294     } else {
295       // Check that the URI and HttpURL have the exact same escaping.
296       if (!toAndFromUri.equals(httpUrl)) {
297         fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
298       }
299       if (!uri.toString().equals(httpUrl.toString())) {
300         fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding));
301       }
302     }
303   }
304 
305   public enum Encoding {
306     IDENTITY {
encode(int codePoint)307       public String encode(int codePoint) {
308         return new String(new int[] { codePoint }, 0, 1);
309       }
310     },
311 
312     PERCENT {
encode(int codePoint)313       public String encode(int codePoint) {
314         ByteString utf8 = ByteString.encodeUtf8(IDENTITY.encode(codePoint));
315         Buffer percentEncoded = new Buffer();
316         for (int i = 0; i < utf8.size(); i++) {
317           percentEncoded.writeUtf8(String.format("%%%02X", utf8.getByte(i) & 0xff));
318         }
319         return percentEncoded.readUtf8();
320       }
321     },
322 
323     SKIP;
324 
encode(int codePoint)325     public String encode(int codePoint) {
326       throw new UnsupportedOperationException();
327     }
328   }
329 
330   public enum Component {
331     USER {
urlString(String value)332       @Override public String urlString(String value) {
333         return "http://" + value + "@example.com/";
334       }
encodedValue(HttpUrl url)335       @Override public String encodedValue(HttpUrl url) {
336         return url.encodedUsername();
337       }
set(HttpUrl.Builder builder, String value)338       @Override public void set(HttpUrl.Builder builder, String value) {
339         builder.username(value);
340       }
get(HttpUrl url)341       @Override public String get(HttpUrl url) {
342         return url.username();
343       }
344     },
345     PASSWORD {
urlString(String value)346       @Override public String urlString(String value) {
347         return "http://:" + value + "@example.com/";
348       }
encodedValue(HttpUrl url)349       @Override public String encodedValue(HttpUrl url) {
350         return url.encodedPassword();
351       }
set(HttpUrl.Builder builder, String value)352       @Override public void set(HttpUrl.Builder builder, String value) {
353         builder.password(value);
354       }
get(HttpUrl url)355       @Override public String get(HttpUrl url) {
356         return url.password();
357       }
358     },
359     PATH {
urlString(String value)360       @Override public String urlString(String value) {
361         return "http://example.com/a" + value + "z/";
362       }
encodedValue(HttpUrl url)363       @Override public String encodedValue(HttpUrl url) {
364         String path = url.encodedPath();
365         return path.substring(2, path.length() - 2);
366       }
set(HttpUrl.Builder builder, String value)367       @Override public void set(HttpUrl.Builder builder, String value) {
368         builder.addPathSegment("a" + value + "z");
369       }
get(HttpUrl url)370       @Override public String get(HttpUrl url) {
371         String pathSegment = url.pathSegments().get(0);
372         return pathSegment.substring(1, pathSegment.length() - 1);
373       }
374     },
375     QUERY {
urlString(String value)376       @Override public String urlString(String value) {
377         return "http://example.com/?a" + value + "z";
378       }
encodedValue(HttpUrl url)379       @Override public String encodedValue(HttpUrl url) {
380         String query = url.encodedQuery();
381         return query.substring(1, query.length() - 1);
382       }
set(HttpUrl.Builder builder, String value)383       @Override public void set(HttpUrl.Builder builder, String value) {
384         builder.query("a" + value + "z");
385       }
get(HttpUrl url)386       @Override public String get(HttpUrl url) {
387         String query = url.query();
388         return query.substring(1, query.length() - 1);
389       }
390     },
391     FRAGMENT {
urlString(String value)392       @Override public String urlString(String value) {
393         return "http://example.com/#a" + value + "z";
394       }
encodedValue(HttpUrl url)395       @Override public String encodedValue(HttpUrl url) {
396         String fragment = url.encodedFragment();
397         return fragment.substring(1, fragment.length() - 1);
398       }
set(HttpUrl.Builder builder, String value)399       @Override public void set(HttpUrl.Builder builder, String value) {
400         builder.fragment("a" + value + "z");
401       }
get(HttpUrl url)402       @Override public String get(HttpUrl url) {
403         String fragment = url.fragment();
404         return fragment.substring(1, fragment.length() - 1);
405       }
406     };
407 
urlString(String value)408     public abstract String urlString(String value);
409 
encodedValue(HttpUrl url)410     public abstract String encodedValue(HttpUrl url);
411 
set(HttpUrl.Builder builder, String value)412     public abstract void set(HttpUrl.Builder builder, String value);
413 
get(HttpUrl url)414     public abstract String get(HttpUrl url);
415   }
416 }
417