• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import junit.framework.TestCase;
21 
22 /**
23  * Tests for {@link UnicodeEscaper}.
24  *
25  * @author David Beaumont
26  */
27 @GwtCompatible
28 public class UnicodeEscaperTest extends TestCase {
29 
30   private static final String SMALLEST_SURROGATE =
31       "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
32   private static final String LARGEST_SURROGATE =
33       "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;
34 
35   private static final String TEST_STRING =
36       "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" + SMALLEST_SURROGATE + "0189" + LARGEST_SURROGATE;
37 
38   // Escapes nothing
39   private static final UnicodeEscaper NOP_ESCAPER =
40       new UnicodeEscaper() {
41         @Override
42         protected char[] escape(int c) {
43           return null;
44         }
45       };
46 
47   // Escapes everything except [a-zA-Z0-9]
48   private static final UnicodeEscaper SIMPLE_ESCAPER =
49       new UnicodeEscaper() {
50         @Override
51         protected char[] escape(int cp) {
52           return ('a' <= cp && cp <= 'z') || ('A' <= cp && cp <= 'Z') || ('0' <= cp && cp <= '9')
53               ? null
54               : ("[" + String.valueOf(cp) + "]").toCharArray();
55         }
56       };
57 
testNopEscaper()58   public void testNopEscaper() {
59     UnicodeEscaper e = NOP_ESCAPER;
60     assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
61   }
62 
testSimpleEscaper()63   public void testSimpleEscaper() {
64     UnicodeEscaper e = SIMPLE_ESCAPER;
65     String expected =
66         "[0]abyz[128][256][2048][4096]ABYZ[65535]"
67             + "["
68             + Character.MIN_SUPPLEMENTARY_CODE_POINT
69             + "]"
70             + "0189["
71             + Character.MAX_CODE_POINT
72             + "]";
73     assertEquals(expected, escapeAsString(e, TEST_STRING));
74   }
75 
testGrowBuffer()76   public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
77     StringBuilder input = new StringBuilder();
78     StringBuilder expected = new StringBuilder();
79     for (int i = 256; i < 1024; i++) {
80       input.append((char) i);
81       expected.append("[" + i + "]");
82     }
83     assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
84   }
85 
testSurrogatePairs()86   public void testSurrogatePairs() {
87     UnicodeEscaper e = SIMPLE_ESCAPER;
88 
89     // Build up a range of surrogate pair characters to test
90     final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
91     final int max = Character.MAX_CODE_POINT;
92     final int range = max - min;
93     final int s1 = min + (1 * range) / 4;
94     final int s2 = min + (2 * range) / 4;
95     final int s3 = min + (3 * range) / 4;
96     final char[] dst = new char[12];
97 
98     // Put surrogate pairs at odd indices so they can be split easily
99     dst[0] = 'x';
100     Character.toChars(min, dst, 1);
101     Character.toChars(s1, dst, 3);
102     Character.toChars(s2, dst, 5);
103     Character.toChars(s3, dst, 7);
104     Character.toChars(max, dst, 9);
105     dst[11] = 'x';
106     String test = new String(dst);
107 
108     // Get the expected result string
109     String expected = "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
110     assertEquals(expected, escapeAsString(e, test));
111   }
112 
testTrailingHighSurrogate()113   public void testTrailingHighSurrogate() {
114     String test = "abc" + Character.MIN_HIGH_SURROGATE;
115     try {
116       escapeAsString(NOP_ESCAPER, test);
117       fail("Trailing high surrogate should cause exception");
118     } catch (IllegalArgumentException expected) {
119       // Pass
120     }
121     try {
122       escapeAsString(SIMPLE_ESCAPER, test);
123       fail("Trailing high surrogate should cause exception");
124     } catch (IllegalArgumentException expected) {
125       // Pass
126     }
127   }
128 
testNullInput()129   public void testNullInput() {
130     UnicodeEscaper e = SIMPLE_ESCAPER;
131     try {
132       e.escape((String) null);
133       fail("Null string should cause exception");
134     } catch (NullPointerException expected) {
135       // Pass
136     }
137   }
138 
testBadStrings()139   public void testBadStrings() {
140     UnicodeEscaper e = SIMPLE_ESCAPER;
141     String[] BAD_STRINGS = {
142       String.valueOf(Character.MIN_LOW_SURROGATE),
143       Character.MIN_LOW_SURROGATE + "xyz",
144       "abc" + Character.MIN_LOW_SURROGATE,
145       "abc" + Character.MIN_LOW_SURROGATE + "xyz",
146       String.valueOf(Character.MAX_LOW_SURROGATE),
147       Character.MAX_LOW_SURROGATE + "xyz",
148       "abc" + Character.MAX_LOW_SURROGATE,
149       "abc" + Character.MAX_LOW_SURROGATE + "xyz",
150     };
151     for (String s : BAD_STRINGS) {
152       try {
153         escapeAsString(e, s);
154         fail("Isolated low surrogate should cause exception [" + s + "]");
155       } catch (IllegalArgumentException expected) {
156         // Pass
157       }
158     }
159   }
160 
testFalsePositivesForNextEscapedIndex()161   public void testFalsePositivesForNextEscapedIndex() {
162     UnicodeEscaper e =
163         new UnicodeEscaper() {
164           // Canonical escaper method that only escapes lower case ASCII letters.
165           @Override
166           protected char[] escape(int cp) {
167             return ('a' <= cp && cp <= 'z') ? new char[] {Character.toUpperCase((char) cp)} : null;
168           }
169           // Inefficient implementation that defines all letters as escapable.
170           @Override
171           protected int nextEscapeIndex(CharSequence csq, int index, int end) {
172             while (index < end && !Character.isLetter(csq.charAt(index))) {
173               index++;
174             }
175             return index;
176           }
177         };
178     assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
179   }
180 
testCodePointAt_IndexOutOfBoundsException()181   public void testCodePointAt_IndexOutOfBoundsException() {
182     try {
183       UnicodeEscaper.codePointAt("Testing...", 4, 2);
184       fail();
185     } catch (IndexOutOfBoundsException expected) {
186     }
187   }
188 
escapeAsString(Escaper e, String s)189   private static String escapeAsString(Escaper e, String s) {
190     return e.escape(s);
191   }
192 }
193