• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import com.google.common.collect.ImmutableMap;
21 import com.google.common.escape.testing.EscaperAsserts;
22 import java.io.IOException;
23 import junit.framework.TestCase;
24 
25 /**
26  * @author David Beaumont
27  */
28 @GwtCompatible
29 @ElementTypesAreNonnullByDefault
30 public class ArrayBasedUnicodeEscaperTest extends TestCase {
31   private static final ImmutableMap<Character, String> NO_REPLACEMENTS = ImmutableMap.of();
32   private static final ImmutableMap<Character, String> SIMPLE_REPLACEMENTS =
33       ImmutableMap.of(
34           '\n', "<newline>",
35           '\t', "<tab>",
36           '&', "<and>");
37   private static final char[] NO_CHARS = new char[0];
38 
testReplacements()39   public void testReplacements() throws IOException {
40     // In reality this is not a very sensible escaper to have (if you are only
41     // escaping elements from a map you would use a ArrayBasedCharEscaper).
42     UnicodeEscaper escaper =
43         new ArrayBasedUnicodeEscaper(
44             SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
45           @Override
46           protected char[] escapeUnsafe(int c) {
47             return NO_CHARS;
48           }
49         };
50     EscaperAsserts.assertBasic(escaper);
51     assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n"));
52 
53     // Verify that everything else is left unescaped.
54     String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
55     assertEquals(safeChars, escaper.escape(safeChars));
56 
57     // Ensure that Unicode escapers behave correctly wrt badly formed input.
58     String badUnicode = "\uDC00\uD800";
59     try {
60       escaper.escape(badUnicode);
61       fail("should fail for bad Unicode");
62     } catch (IllegalArgumentException e) {
63       // Pass
64     }
65   }
66 
testSafeRange()67   public void testSafeRange() throws IOException {
68     // Basic escaping of unsafe chars (wrap them in {,}'s)
69     UnicodeEscaper wrappingEscaper =
70         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
71           @Override
72           protected char[] escapeUnsafe(int c) {
73             return ("{" + (char) c + "}").toCharArray();
74           }
75         };
76     EscaperAsserts.assertBasic(wrappingEscaper);
77     // '[' and '@' lie either side of [A-Z].
78     assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
79   }
80 
testDeleteUnsafeChars()81   public void testDeleteUnsafeChars() throws IOException {
82     UnicodeEscaper deletingEscaper =
83         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
84           @Override
85           protected char[] escapeUnsafe(int c) {
86             return NO_CHARS;
87           }
88         };
89     EscaperAsserts.assertBasic(deletingEscaper);
90     assertEquals(
91         "Everything outside the printable ASCII range is deleted.",
92         deletingEscaper.escape(
93             "\tEverything\0 outside the\uD800\uDC00 "
94                 + "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
95   }
96 
testReplacementPriority()97   public void testReplacementPriority() throws IOException {
98     UnicodeEscaper replacingEscaper =
99         new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
100           private final char[] unknown = new char[] {'?'};
101 
102           @Override
103           protected char[] escapeUnsafe(int c) {
104             return unknown;
105           }
106         };
107     EscaperAsserts.assertBasic(replacingEscaper);
108 
109     // Replacements are applied first regardless of whether the character is in
110     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
111     assertEquals(
112         "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n"));
113   }
114 
testCodePointsFromSurrogatePairs()115   public void testCodePointsFromSurrogatePairs() throws IOException {
116     UnicodeEscaper surrogateEscaper =
117         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
118           private final char[] escaped = new char[] {'X'};
119 
120           @Override
121           protected char[] escapeUnsafe(int c) {
122             return escaped;
123           }
124         };
125     EscaperAsserts.assertBasic(surrogateEscaper);
126 
127     // A surrogate pair defining a code point within the safe range.
128     String safeInput = "\uD800\uDC00"; // 0x10000
129     assertEquals(safeInput, surrogateEscaper.escape(safeInput));
130 
131     // A surrogate pair defining a code point outside the safe range (but both
132     // of the surrogate characters lie within the safe range). It is important
133     // not to accidentally treat this as a sequence of safe characters.
134     String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF
135     assertEquals("X", surrogateEscaper.escape(unsafeInput));
136   }
137 }
138