• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import junit.framework.TestCase;
21 import org.checkerframework.checker.nullness.qual.Nullable;
22 
23 /**
24  * Tests for {@link UnicodeEscaper}.
25  *
26  * @author David Beaumont
27  */
28 @GwtCompatible
29 public class UnicodeEscaperTest extends TestCase {
30 
31   private static final String SMALLEST_SURROGATE =
32       "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
33   private static final String LARGEST_SURROGATE =
34       "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;
35 
36   private static final String TEST_STRING =
37       "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" + SMALLEST_SURROGATE + "0189" + LARGEST_SURROGATE;
38 
39   // Escapes nothing
40   private static final UnicodeEscaper NOP_ESCAPER =
41       new UnicodeEscaper() {
42         @Override
43         protected char @Nullable [] escape(int c) {
44           return null;
45         }
46       };
47 
48   // Escapes everything except [a-zA-Z0-9]
49   private static final UnicodeEscaper SIMPLE_ESCAPER =
50       new UnicodeEscaper() {
51         @Override
52         protected char @Nullable [] escape(int cp) {
53           return ('a' <= cp && cp <= 'z') || ('A' <= cp && cp <= 'Z') || ('0' <= cp && cp <= '9')
54               ? null
55               : ("[" + String.valueOf(cp) + "]").toCharArray();
56         }
57       };
58 
testNopEscaper()59   public void testNopEscaper() {
60     UnicodeEscaper e = NOP_ESCAPER;
61     assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
62   }
63 
testSimpleEscaper()64   public void testSimpleEscaper() {
65     UnicodeEscaper e = SIMPLE_ESCAPER;
66     String expected =
67         "[0]abyz[128][256][2048][4096]ABYZ[65535]"
68             + "["
69             + Character.MIN_SUPPLEMENTARY_CODE_POINT
70             + "]"
71             + "0189["
72             + Character.MAX_CODE_POINT
73             + "]";
74     assertEquals(expected, escapeAsString(e, TEST_STRING));
75   }
76 
testGrowBuffer()77   public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
78     StringBuilder input = new StringBuilder();
79     StringBuilder expected = new StringBuilder();
80     for (int i = 256; i < 1024; i++) {
81       input.append((char) i);
82       expected.append("[" + i + "]");
83     }
84     assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
85   }
86 
testSurrogatePairs()87   public void testSurrogatePairs() {
88     UnicodeEscaper e = SIMPLE_ESCAPER;
89 
90     // Build up a range of surrogate pair characters to test
91     final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
92     final int max = Character.MAX_CODE_POINT;
93     final int range = max - min;
94     final int s1 = min + (1 * range) / 4;
95     final int s2 = min + (2 * range) / 4;
96     final int s3 = min + (3 * range) / 4;
97     final char[] dst = new char[12];
98 
99     // Put surrogate pairs at odd indices so they can be split easily
100     dst[0] = 'x';
101     Character.toChars(min, dst, 1);
102     Character.toChars(s1, dst, 3);
103     Character.toChars(s2, dst, 5);
104     Character.toChars(s3, dst, 7);
105     Character.toChars(max, dst, 9);
106     dst[11] = 'x';
107     String test = new String(dst);
108 
109     // Get the expected result string
110     String expected = "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
111     assertEquals(expected, escapeAsString(e, test));
112   }
113 
testTrailingHighSurrogate()114   public void testTrailingHighSurrogate() {
115     String test = "abc" + Character.MIN_HIGH_SURROGATE;
116     try {
117       escapeAsString(NOP_ESCAPER, test);
118       fail("Trailing high surrogate should cause exception");
119     } catch (IllegalArgumentException expected) {
120       // Pass
121     }
122     try {
123       escapeAsString(SIMPLE_ESCAPER, test);
124       fail("Trailing high surrogate should cause exception");
125     } catch (IllegalArgumentException expected) {
126       // Pass
127     }
128   }
129 
testNullInput()130   public void testNullInput() {
131     UnicodeEscaper e = SIMPLE_ESCAPER;
132     try {
133       e.escape((String) null);
134       fail("Null string should cause exception");
135     } catch (NullPointerException expected) {
136       // Pass
137     }
138   }
139 
testBadStrings()140   public void testBadStrings() {
141     UnicodeEscaper e = SIMPLE_ESCAPER;
142     String[] BAD_STRINGS = {
143       String.valueOf(Character.MIN_LOW_SURROGATE),
144       Character.MIN_LOW_SURROGATE + "xyz",
145       "abc" + Character.MIN_LOW_SURROGATE,
146       "abc" + Character.MIN_LOW_SURROGATE + "xyz",
147       String.valueOf(Character.MAX_LOW_SURROGATE),
148       Character.MAX_LOW_SURROGATE + "xyz",
149       "abc" + Character.MAX_LOW_SURROGATE,
150       "abc" + Character.MAX_LOW_SURROGATE + "xyz",
151     };
152     for (String s : BAD_STRINGS) {
153       try {
154         escapeAsString(e, s);
155         fail("Isolated low surrogate should cause exception [" + s + "]");
156       } catch (IllegalArgumentException expected) {
157         // Pass
158       }
159     }
160   }
161 
testFalsePositivesForNextEscapedIndex()162   public void testFalsePositivesForNextEscapedIndex() {
163     UnicodeEscaper e =
164         new UnicodeEscaper() {
165           // Canonical escaper method that only escapes lower case ASCII letters.
166           @Override
167           protected char @Nullable [] escape(int cp) {
168             return ('a' <= cp && cp <= 'z') ? new char[] {Character.toUpperCase((char) cp)} : null;
169           }
170           // Inefficient implementation that defines all letters as escapable.
171           @Override
172           protected int nextEscapeIndex(CharSequence csq, int index, int end) {
173             while (index < end && !Character.isLetter(csq.charAt(index))) {
174               index++;
175             }
176             return index;
177           }
178         };
179     assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
180   }
181 
testCodePointAt_indexOutOfBoundsException()182   public void testCodePointAt_indexOutOfBoundsException() {
183     try {
184       UnicodeEscaper.codePointAt("Testing...", 4, 2);
185       fail();
186     } catch (IndexOutOfBoundsException expected) {
187     }
188   }
189 
escapeAsString(Escaper e, String s)190   private static String escapeAsString(Escaper e, String s) {
191     return e.escape(s);
192   }
193 }
194