1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import com.google.common.annotations.GwtCompatible; 20 import com.google.common.collect.ImmutableMap; 21 import com.google.common.escape.testing.EscaperAsserts; 22 import java.io.IOException; 23 import junit.framework.TestCase; 24 25 /** 26 * @author David Beaumont 27 */ 28 @GwtCompatible 29 @ElementTypesAreNonnullByDefault 30 public class ArrayBasedUnicodeEscaperTest extends TestCase { 31 private static final ImmutableMap<Character, String> NO_REPLACEMENTS = ImmutableMap.of(); 32 private static final ImmutableMap<Character, String> SIMPLE_REPLACEMENTS = 33 ImmutableMap.of( 34 '\n', "<newline>", 35 '\t', "<tab>", 36 '&', "<and>"); 37 private static final char[] NO_CHARS = new char[0]; 38 testReplacements()39 public void testReplacements() throws IOException { 40 // In reality this is not a very sensible escaper to have (if you are only 41 // escaping elements from a map you would use a ArrayBasedCharEscaper). 42 UnicodeEscaper escaper = 43 new ArrayBasedUnicodeEscaper( 44 SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) { 45 @Override 46 protected char[] escapeUnsafe(int c) { 47 return NO_CHARS; 48 } 49 }; 50 EscaperAsserts.assertBasic(escaper); 51 assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n")); 52 53 // Verify that everything else is left unescaped. 54 String safeChars = "\0\u0100\uD800\uDC00\uFFFF"; 55 assertEquals(safeChars, escaper.escape(safeChars)); 56 57 // Ensure that Unicode escapers behave correctly wrt badly formed input. 58 String badUnicode = "\uDC00\uD800"; 59 try { 60 escaper.escape(badUnicode); 61 fail("should fail for bad Unicode"); 62 } catch (IllegalArgumentException e) { 63 // Pass 64 } 65 } 66 testSafeRange()67 public void testSafeRange() throws IOException { 68 // Basic escaping of unsafe chars (wrap them in {,}'s) 69 UnicodeEscaper wrappingEscaper = 70 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) { 71 @Override 72 protected char[] escapeUnsafe(int c) { 73 return ("{" + (char) c + "}").toCharArray(); 74 } 75 }; 76 EscaperAsserts.assertBasic(wrappingEscaper); 77 // '[' and '@' lie either side of [A-Z]. 78 assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]")); 79 } 80 testDeleteUnsafeChars()81 public void testDeleteUnsafeChars() throws IOException { 82 UnicodeEscaper deletingEscaper = 83 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) { 84 @Override 85 protected char[] escapeUnsafe(int c) { 86 return NO_CHARS; 87 } 88 }; 89 EscaperAsserts.assertBasic(deletingEscaper); 90 assertEquals( 91 "Everything outside the printable ASCII range is deleted.", 92 deletingEscaper.escape( 93 "\tEverything\0 outside the\uD800\uDC00 " 94 + "printable ASCII \uFFFFrange is \u007Fdeleted.\n")); 95 } 96 testReplacementPriority()97 public void testReplacementPriority() throws IOException { 98 UnicodeEscaper replacingEscaper = 99 new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) { 100 private final char[] unknown = new char[] {'?'}; 101 102 @Override 103 protected char[] escapeUnsafe(int c) { 104 return unknown; 105 } 106 }; 107 EscaperAsserts.assertBasic(replacingEscaper); 108 109 // Replacements are applied first regardless of whether the character is in 110 // the safe range or not ('&' is a safe char while '\t' and '\n' are not). 111 assertEquals( 112 "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n")); 113 } 114 testCodePointsFromSurrogatePairs()115 public void testCodePointsFromSurrogatePairs() throws IOException { 116 UnicodeEscaper surrogateEscaper = 117 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) { 118 private final char[] escaped = new char[] {'X'}; 119 120 @Override 121 protected char[] escapeUnsafe(int c) { 122 return escaped; 123 } 124 }; 125 EscaperAsserts.assertBasic(surrogateEscaper); 126 127 // A surrogate pair defining a code point within the safe range. 128 String safeInput = "\uD800\uDC00"; // 0x10000 129 assertEquals(safeInput, surrogateEscaper.escape(safeInput)); 130 131 // A surrogate pair defining a code point outside the safe range (but both 132 // of the surrogate characters lie within the safe range). It is important 133 // not to accidentally treat this as a sequence of safe characters. 134 String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF 135 assertEquals("X", surrogateEscaper.escape(unsafeInput)); 136 } 137 } 138