1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.escape; 18 19 import com.google.common.annotations.GwtCompatible; 20 import com.google.common.collect.ImmutableMap; 21 import com.google.common.escape.testing.EscaperAsserts; 22 import java.io.IOException; 23 import junit.framework.TestCase; 24 25 /** @author David Beaumont */ 26 @GwtCompatible 27 public class ArrayBasedUnicodeEscaperTest extends TestCase { 28 private static final ImmutableMap<Character, String> NO_REPLACEMENTS = ImmutableMap.of(); 29 private static final ImmutableMap<Character, String> SIMPLE_REPLACEMENTS = 30 ImmutableMap.of( 31 '\n', "<newline>", 32 '\t', "<tab>", 33 '&', "<and>"); 34 private static final char[] NO_CHARS = new char[0]; 35 testReplacements()36 public void testReplacements() throws IOException { 37 // In reality this is not a very sensible escaper to have (if you are only 38 // escaping elements from a map you would use a ArrayBasedCharEscaper). 39 UnicodeEscaper escaper = 40 new ArrayBasedUnicodeEscaper( 41 SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) { 42 @Override 43 protected char[] escapeUnsafe(int c) { 44 return NO_CHARS; 45 } 46 }; 47 EscaperAsserts.assertBasic(escaper); 48 assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n")); 49 50 // Verify that everything else is left unescaped. 51 String safeChars = "\0\u0100\uD800\uDC00\uFFFF"; 52 assertEquals(safeChars, escaper.escape(safeChars)); 53 54 // Ensure that Unicode escapers behave correctly wrt badly formed input. 55 String badUnicode = "\uDC00\uD800"; 56 try { 57 escaper.escape(badUnicode); 58 fail("should fail for bad Unicode"); 59 } catch (IllegalArgumentException e) { 60 // Pass 61 } 62 } 63 testSafeRange()64 public void testSafeRange() throws IOException { 65 // Basic escaping of unsafe chars (wrap them in {,}'s) 66 UnicodeEscaper wrappingEscaper = 67 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) { 68 @Override 69 protected char[] escapeUnsafe(int c) { 70 return ("{" + (char) c + "}").toCharArray(); 71 } 72 }; 73 EscaperAsserts.assertBasic(wrappingEscaper); 74 // '[' and '@' lie either side of [A-Z]. 75 assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]")); 76 } 77 testDeleteUnsafeChars()78 public void testDeleteUnsafeChars() throws IOException { 79 UnicodeEscaper deletingEscaper = 80 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) { 81 @Override 82 protected char[] escapeUnsafe(int c) { 83 return NO_CHARS; 84 } 85 }; 86 EscaperAsserts.assertBasic(deletingEscaper); 87 assertEquals( 88 "Everything outside the printable ASCII range is deleted.", 89 deletingEscaper.escape( 90 "\tEverything\0 outside the\uD800\uDC00 " 91 + "printable ASCII \uFFFFrange is \u007Fdeleted.\n")); 92 } 93 testReplacementPriority()94 public void testReplacementPriority() throws IOException { 95 UnicodeEscaper replacingEscaper = 96 new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) { 97 private final char[] unknown = new char[] {'?'}; 98 99 @Override 100 protected char[] escapeUnsafe(int c) { 101 return unknown; 102 } 103 }; 104 EscaperAsserts.assertBasic(replacingEscaper); 105 106 // Replacements are applied first regardless of whether the character is in 107 // the safe range or not ('&' is a safe char while '\t' and '\n' are not). 108 assertEquals( 109 "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n")); 110 } 111 testCodePointsFromSurrogatePairs()112 public void testCodePointsFromSurrogatePairs() throws IOException { 113 UnicodeEscaper surrogateEscaper = 114 new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) { 115 private final char[] escaped = new char[] {'X'}; 116 117 @Override 118 protected char[] escapeUnsafe(int c) { 119 return escaped; 120 } 121 }; 122 EscaperAsserts.assertBasic(surrogateEscaper); 123 124 // A surrogate pair defining a code point within the safe range. 125 String safeInput = "\uD800\uDC00"; // 0x10000 126 assertEquals(safeInput, surrogateEscaper.escape(safeInput)); 127 128 // A surrogate pair defining a code point outside the safe range (but both 129 // of the surrogate characters lie within the safe range). It is important 130 // not to accidentally treat this as a sequence of safe characters. 131 String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF 132 assertEquals("X", surrogateEscaper.escape(unsafeInput)); 133 } 134 } 135