• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import com.google.common.collect.ImmutableMap;
21 import com.google.common.escape.testing.EscaperAsserts;
22 import java.io.IOException;
23 import junit.framework.TestCase;
24 
25 /** @author David Beaumont */
26 @GwtCompatible
27 public class ArrayBasedUnicodeEscaperTest extends TestCase {
28   private static final ImmutableMap<Character, String> NO_REPLACEMENTS = ImmutableMap.of();
29   private static final ImmutableMap<Character, String> SIMPLE_REPLACEMENTS =
30       ImmutableMap.of(
31           '\n', "<newline>",
32           '\t', "<tab>",
33           '&', "<and>");
34   private static final char[] NO_CHARS = new char[0];
35 
testReplacements()36   public void testReplacements() throws IOException {
37     // In reality this is not a very sensible escaper to have (if you are only
38     // escaping elements from a map you would use a ArrayBasedCharEscaper).
39     UnicodeEscaper escaper =
40         new ArrayBasedUnicodeEscaper(
41             SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
42           @Override
43           protected char[] escapeUnsafe(int c) {
44             return NO_CHARS;
45           }
46         };
47     EscaperAsserts.assertBasic(escaper);
48     assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n"));
49 
50     // Verify that everything else is left unescaped.
51     String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
52     assertEquals(safeChars, escaper.escape(safeChars));
53 
54     // Ensure that Unicode escapers behave correctly wrt badly formed input.
55     String badUnicode = "\uDC00\uD800";
56     try {
57       escaper.escape(badUnicode);
58       fail("should fail for bad Unicode");
59     } catch (IllegalArgumentException e) {
60       // Pass
61     }
62   }
63 
testSafeRange()64   public void testSafeRange() throws IOException {
65     // Basic escaping of unsafe chars (wrap them in {,}'s)
66     UnicodeEscaper wrappingEscaper =
67         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
68           @Override
69           protected char[] escapeUnsafe(int c) {
70             return ("{" + (char) c + "}").toCharArray();
71           }
72         };
73     EscaperAsserts.assertBasic(wrappingEscaper);
74     // '[' and '@' lie either side of [A-Z].
75     assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
76   }
77 
testDeleteUnsafeChars()78   public void testDeleteUnsafeChars() throws IOException {
79     UnicodeEscaper deletingEscaper =
80         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
81           @Override
82           protected char[] escapeUnsafe(int c) {
83             return NO_CHARS;
84           }
85         };
86     EscaperAsserts.assertBasic(deletingEscaper);
87     assertEquals(
88         "Everything outside the printable ASCII range is deleted.",
89         deletingEscaper.escape(
90             "\tEverything\0 outside the\uD800\uDC00 "
91                 + "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
92   }
93 
testReplacementPriority()94   public void testReplacementPriority() throws IOException {
95     UnicodeEscaper replacingEscaper =
96         new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
97           private final char[] unknown = new char[] {'?'};
98 
99           @Override
100           protected char[] escapeUnsafe(int c) {
101             return unknown;
102           }
103         };
104     EscaperAsserts.assertBasic(replacingEscaper);
105 
106     // Replacements are applied first regardless of whether the character is in
107     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
108     assertEquals(
109         "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n"));
110   }
111 
testCodePointsFromSurrogatePairs()112   public void testCodePointsFromSurrogatePairs() throws IOException {
113     UnicodeEscaper surrogateEscaper =
114         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
115           private final char[] escaped = new char[] {'X'};
116 
117           @Override
118           protected char[] escapeUnsafe(int c) {
119             return escaped;
120           }
121         };
122     EscaperAsserts.assertBasic(surrogateEscaper);
123 
124     // A surrogate pair defining a code point within the safe range.
125     String safeInput = "\uD800\uDC00"; // 0x10000
126     assertEquals(safeInput, surrogateEscaper.escape(safeInput));
127 
128     // A surrogate pair defining a code point outside the safe range (but both
129     // of the surrogate characters lie within the safe range). It is important
130     // not to accidentally treat this as a sequence of safe characters.
131     String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF
132     assertEquals("X", surrogateEscaper.escape(unsafeInput));
133   }
134 }
135