1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3; 18 19 import static org.junit.jupiter.api.Assertions.assertEquals; 20 import static org.junit.jupiter.api.Assertions.assertFalse; 21 import static org.junit.jupiter.api.Assertions.assertNotNull; 22 import static org.junit.jupiter.api.Assertions.assertNull; 23 import static org.junit.jupiter.api.Assertions.assertThrows; 24 import static org.junit.jupiter.api.Assertions.assertTrue; 25 26 import java.io.IOException; 27 import java.io.StringWriter; 28 import java.lang.reflect.Constructor; 29 import java.lang.reflect.Modifier; 30 import java.nio.charset.StandardCharsets; 31 import java.nio.file.Files; 32 import java.nio.file.Paths; 33 34 import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 35 import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 36 import org.junit.jupiter.api.Test; 37 38 /** 39 * Unit tests for {@link StringEscapeUtils}. 40 */ 41 @Deprecated 42 public class StringEscapeUtilsTest extends AbstractLangTest { 43 private static final String FOO = "foo"; 44 45 @Test testConstructor()46 public void testConstructor() { 47 assertNotNull(new StringEscapeUtils()); 48 final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors(); 49 assertEquals(1, cons.length); 50 assertTrue(Modifier.isPublic(cons[0].getModifiers())); 51 assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers())); 52 assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers())); 53 } 54 55 @Test testEscapeJava()56 public void testEscapeJava() throws IOException { 57 assertNull(StringEscapeUtils.escapeJava(null)); 58 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate(null, null)); 59 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate("", null)); 60 61 assertEscapeJava("empty string", "", ""); 62 assertEscapeJava(FOO, FOO); 63 assertEscapeJava("tab", "\\t", "\t"); 64 assertEscapeJava("backslash", "\\\\", "\\"); 65 assertEscapeJava("single quote should not be escaped", "'", "'"); 66 assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r"); 67 assertEscapeJava("\\u1234", "\u1234"); 68 assertEscapeJava("\\u0234", "\u0234"); 69 assertEscapeJava("\\u00EF", "\u00ef"); 70 assertEscapeJava("\\u0001", "\u0001"); 71 assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd"); 72 73 assertEscapeJava("He didn't say, \\\"stop!\\\"", 74 "He didn't say, \"stop!\""); 75 assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0", 76 "This space is non-breaking:\u00a0"); 77 assertEscapeJava("\\uABCD\\u1234\\u012C", 78 "\uABCD\u1234\u012C"); 79 } 80 81 /** 82 * Tests https://issues.apache.org/jira/browse/LANG-421 83 */ 84 @Test testEscapeJavaWithSlash()85 public void testEscapeJavaWithSlash() { 86 final String input = "String with a slash (/) in it"; 87 88 final String expected = input; 89 final String actual = StringEscapeUtils.escapeJava(input); 90 91 /* 92 * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape 93 * in a Java string. 94 */ 95 assertEquals(expected, actual); 96 } 97 assertEscapeJava(final String escaped, final String original)98 private void assertEscapeJava(final String escaped, final String original) throws IOException { 99 assertEscapeJava(null, escaped, original); 100 } 101 assertEscapeJava(String message, final String expected, final String original)102 private void assertEscapeJava(String message, final String expected, final String original) throws IOException { 103 final String converted = StringEscapeUtils.escapeJava(original); 104 message = "escapeJava(String) failed" + (message == null ? "" : (": " + message)); 105 assertEquals(expected, converted, message); 106 107 final StringWriter writer = new StringWriter(); 108 StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); 109 assertEquals(expected, writer.toString()); 110 } 111 112 @Test testUnescapeJava()113 public void testUnescapeJava() throws IOException { 114 assertNull(StringEscapeUtils.unescapeJava(null)); 115 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate(null, null)); 116 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate("", null)); 117 assertThrows(RuntimeException.class, () -> StringEscapeUtils.unescapeJava("\\u02-3")); 118 119 assertUnescapeJava("", ""); 120 assertUnescapeJava("test", "test"); 121 assertUnescapeJava("\ntest\b", "\\ntest\\b"); 122 assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b"); 123 assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r"); 124 assertUnescapeJava("", "\\"); 125 //foo 126 assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx"); 127 assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx"); 128 assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd"); 129 } 130 assertUnescapeJava(final String unescaped, final String original)131 private void assertUnescapeJava(final String unescaped, final String original) throws IOException { 132 assertUnescapeJava(null, unescaped, original); 133 } 134 assertUnescapeJava(final String message, final String unescaped, final String original)135 private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException { 136 final String expected = unescaped; 137 final String actual = StringEscapeUtils.unescapeJava(original); 138 139 assertEquals(expected, actual, 140 "unescape(String) failed" + 141 (message == null ? "" : (": " + message)) + 142 ": expected '" + StringEscapeUtils.escapeJava(expected) + 143 // we escape this so we can see it in the error message 144 "' actual '" + StringEscapeUtils.escapeJava(actual) + "'"); 145 146 final StringWriter writer = new StringWriter(); 147 StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); 148 assertEquals(unescaped, writer.toString()); 149 150 } 151 152 @Test testEscapeEcmaScript()153 public void testEscapeEcmaScript() { 154 assertNull(StringEscapeUtils.escapeEcmaScript(null)); 155 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null)); 156 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null)); 157 158 assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); 159 assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", 160 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';")); 161 } 162 163 @Test testUnescapeEcmaScript()164 public void testUnescapeEcmaScript() { 165 assertNull(StringEscapeUtils.escapeEcmaScript(null)); 166 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate(null, null)); 167 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate("", null)); 168 169 assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeEcmaScript("He didn\\'t say, \\\"stop!\\\"")); 170 assertEquals("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';", 171 StringEscapeUtils.unescapeEcmaScript("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';")); 172 } 173 174 175 /** HTML and XML */ 176 private static final String[][] HTML_ESCAPES = { 177 {"no escaping", "plain text", "plain text"}, 178 {"no escaping", "plain text", "plain text"}, 179 {"empty string", "", ""}, 180 {"null", null, null}, 181 {"ampersand", "bread & butter", "bread & butter"}, 182 {"quotes", ""bread" & butter", "\"bread\" & butter"}, 183 {"final character only", "greater than >", "greater than >"}, 184 {"first character only", "< less than", "< less than"}, 185 {"apostrophe", "Huntington's chorea", "Huntington's chorea"}, 186 {"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"}, 187 {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, 188 }; 189 190 @Test testEscapeHtml()191 public void testEscapeHtml() throws IOException { 192 for (final String[] element : HTML_ESCAPES) { 193 final String message = element[0]; 194 final String expected = element[1]; 195 final String original = element[2]; 196 assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message); 197 final StringWriter sw = new StringWriter(); 198 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); 199 final String actual = original == null ? null : sw.toString(); 200 assertEquals(expected, actual, message); 201 } 202 } 203 204 @Test testUnescapeHtml4()205 public void testUnescapeHtml4() throws IOException { 206 for (final String[] element : HTML_ESCAPES) { 207 final String message = element[0]; 208 final String expected = element[2]; 209 final String original = element[1]; 210 assertEquals(expected, StringEscapeUtils.unescapeHtml4(original), message); 211 212 final StringWriter sw = new StringWriter(); 213 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); 214 final String actual = original == null ? null : sw.toString(); 215 assertEquals(expected, actual, message); 216 } 217 // \u00E7 is a cedilla (c with wiggle under) 218 // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly 219 // on some locales 220 assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"), "funny chars pass through OK"); 221 222 assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); 223 assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); 224 assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); 225 assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); 226 } 227 228 @Test testUnescapeHexCharsHtml()229 public void testUnescapeHexCharsHtml() { 230 // Simple easy to grok test 231 assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ"), "hex number unescape"); 232 assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ"), "hex number unescape"); 233 // Test all Character values: 234 for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { 235 final Character c1 = Character.valueOf(i); 236 final Character c2 = Character.valueOf((char) (i+1)); 237 final String expected = c1.toString() + c2.toString(); 238 final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; 239 final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; 240 assertEquals(expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2), "hex number unescape index " + (int) i); 241 } 242 } 243 244 @Test testUnescapeUnknownEntity()245 public void testUnescapeUnknownEntity() { 246 assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); 247 } 248 249 @Test testEscapeHtmlVersions()250 public void testEscapeHtmlVersions() { 251 assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); 252 assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); 253 254 // TODO: refine API for escaping/unescaping specific HTML versions 255 } 256 257 @Test testEscapeXml()258 public void testEscapeXml() throws Exception { 259 assertEquals("<abc>", StringEscapeUtils.escapeXml("<abc>")); 260 assertEquals("<abc>", StringEscapeUtils.unescapeXml("<abc>")); 261 262 assertEquals("\u00A1", StringEscapeUtils.escapeXml("\u00A1"), "XML should not escape >0x7f values"); 263 assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), "XML should be able to unescape >0x7f values"); 264 assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), 265 "XML should be able to unescape >0x7f values with one leading 0"); 266 assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), 267 "XML should be able to unescape >0x7f values with two leading 0s"); 268 assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), 269 "XML should be able to unescape >0x7f values with three leading 0s"); 270 271 assertEquals("ain't", StringEscapeUtils.unescapeXml("ain't")); 272 assertEquals("ain't", StringEscapeUtils.escapeXml("ain't")); 273 assertEquals("", StringEscapeUtils.escapeXml("")); 274 assertNull(StringEscapeUtils.escapeXml(null)); 275 assertNull(StringEscapeUtils.unescapeXml(null)); 276 277 StringWriter sw = new StringWriter(); 278 StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw); 279 assertEquals("<abc>", sw.toString(), "XML was escaped incorrectly"); 280 281 sw = new StringWriter(); 282 StringEscapeUtils.UNESCAPE_XML.translate("<abc>", sw); 283 assertEquals("<abc>", sw.toString(), "XML was unescaped incorrectly"); 284 } 285 286 @Test testEscapeXml10()287 public void testEscapeXml10() { 288 assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f")); 289 assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"), "XML 1.0 should not escape \t \n \r"); 290 assertEquals("ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"), 291 "XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19"); 292 assertEquals("a\ud7ff \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"), 293 "XML 1.0 should omit #xd800-#xdfff"); 294 assertEquals("a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"), 295 "XML 1.0 should omit #xfffe | #xffff"); 296 assertEquals("a\u007e„\u0085†Ÿ\u00a0b", 297 StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"), 298 "XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility"); 299 } 300 301 @Test testEscapeXml11()302 public void testEscapeXml11() { 303 assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f")); 304 assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"), "XML 1.1 should not escape \t \n \r"); 305 assertEquals("ab", StringEscapeUtils.escapeXml11("a\u0000b"), "XML 1.1 should omit #x0"); 306 assertEquals("ab", 307 StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"), 308 "XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19"); 309 assertEquals("a\u007e„\u0085†Ÿ\u00a0b", 310 StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"), 311 "XML 1.1 should escape #x7F-#x84 | #x86-#x9F"); 312 assertEquals("a\ud7ff \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"), 313 "XML 1.1 should omit #xd800-#xdfff"); 314 assertEquals("a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"), 315 "XML 1.1 should omit #xfffe | #xffff"); 316 } 317 318 /** 319 * Tests Supplementary characters. 320 * <p> 321 * From https://www.w3.org/International/questions/qa-escapes 322 * </p> 323 * <blockquote> 324 * Supplementary characters are those Unicode characters that have code points higher than the characters in 325 * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the 326 * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect 327 * - you must use the single, code point value for that character. For example, use &#x233B4; rather than 328 * &#xD84C;&#xDFB4;. 329 * </blockquote> 330 * @see <a href="https://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a> 331 * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a> 332 */ 333 @Test testEscapeXmlSupplementaryCharacters()334 public void testEscapeXmlSupplementaryCharacters() { 335 final CharSequenceTranslator escapeXml = 336 StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) ); 337 338 assertEquals("𣎴", escapeXml.translate("\uD84C\uDFB4"), 339 "Supplementary character must be represented using a single escape"); 340 341 assertEquals("a b c 𣎴", escapeXml.translate("a b c \uD84C\uDFB4"), 342 "Supplementary characters mixed with basic characters should be encoded correctly"); 343 } 344 345 @Test testEscapeXmlAllCharacters()346 public void testEscapeXmlAllCharacters() { 347 // https://www.w3.org/TR/xml/#charsets says: 348 // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, 349 // excluding the surrogate blocks, FFFE, and FFFF. */ 350 final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML 351 .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19), 352 NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000)); 353 354 assertEquals("�", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008")); 355 assertEquals("\t", escapeXml.translate("\t")); // 0x9 356 assertEquals("\n", escapeXml.translate("\n")); // 0xA 357 assertEquals("", escapeXml.translate("\u000B\u000C")); 358 assertEquals("\r", escapeXml.translate("\r")); // 0xD 359 assertEquals("Hello World! Ain't this great?", escapeXml.translate("Hello World! Ain't this great?")); 360 assertEquals("", escapeXml.translate("\u000E\u000F\u0018\u0019")); 361 } 362 363 /** 364 * Reverse of the above. 365 * 366 * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a> 367 */ 368 @Test testUnescapeXmlSupplementaryCharacters()369 public void testUnescapeXmlSupplementaryCharacters() { 370 assertEquals("\uD84C\uDFB4", StringEscapeUtils.unescapeXml("𣎴"), 371 "Supplementary character must be represented using a single escape"); 372 373 assertEquals("a b c \uD84C\uDFB4", StringEscapeUtils.unescapeXml("a b c 𣎴"), 374 "Supplementary characters mixed with basic characters should be decoded correctly"); 375 } 376 377 // Tests issue LANG-150 378 // https://issues.apache.org/jira/browse/LANG-150 379 @Test testStandaloneAmphersand()380 public void testStandaloneAmphersand() { 381 assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("<P&O>")); 382 assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); 383 assertEquals("<P&O>", StringEscapeUtils.unescapeXml("<P&O>")); 384 assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); 385 } 386 387 @Test testLang313()388 public void testLang313() { 389 assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); 390 } 391 392 @Test testEscapeCsvString()393 public void testEscapeCsvString() { 394 assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); 395 assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar")); 396 assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar")); 397 assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar")); 398 assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar")); 399 assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar")); 400 assertEquals("", StringEscapeUtils.escapeCsv("")); 401 assertNull(StringEscapeUtils.escapeCsv(null)); 402 } 403 404 @Test testEscapeCsvWriter()405 public void testEscapeCsvWriter() throws Exception { 406 checkCsvEscapeWriter("foo.bar", "foo.bar"); 407 checkCsvEscapeWriter("\"foo,bar\"", "foo,bar"); 408 checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar"); 409 checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar"); 410 checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar"); 411 checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); 412 checkCsvEscapeWriter("", null); 413 checkCsvEscapeWriter("", ""); 414 } 415 checkCsvEscapeWriter(final String expected, final String value)416 private void checkCsvEscapeWriter(final String expected, final String value) throws IOException { 417 final StringWriter writer = new StringWriter(); 418 StringEscapeUtils.ESCAPE_CSV.translate(value, writer); 419 assertEquals(expected, writer.toString()); 420 } 421 422 @Test testEscapeCsvIllegalStateException()423 public void testEscapeCsvIllegalStateException() { 424 final StringWriter writer = new StringWriter(); 425 assertThrows(IllegalStateException.class, () -> StringEscapeUtils.ESCAPE_CSV.translate("foo", -1, writer)); 426 } 427 428 @Test testUnescapeCsvString()429 public void testUnescapeCsvString() { 430 assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); 431 assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); 432 assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); 433 assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); 434 assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); 435 assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar")); 436 assertEquals("", StringEscapeUtils.unescapeCsv("")); 437 assertNull(StringEscapeUtils.unescapeCsv(null)); 438 439 assertEquals("\"foo.bar\"", StringEscapeUtils.unescapeCsv("\"foo.bar\"")); 440 } 441 442 @Test testUnescapeCsvWriter()443 public void testUnescapeCsvWriter() throws Exception { 444 checkCsvUnescapeWriter("foo.bar", "foo.bar"); 445 checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); 446 checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); 447 checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); 448 checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); 449 checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); 450 checkCsvUnescapeWriter("", null); 451 checkCsvUnescapeWriter("", ""); 452 453 checkCsvUnescapeWriter("\"foo.bar\"", "\"foo.bar\""); 454 } 455 checkCsvUnescapeWriter(final String expected, final String value)456 private void checkCsvUnescapeWriter(final String expected, final String value) throws IOException { 457 final StringWriter writer = new StringWriter(); 458 StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); 459 assertEquals(expected, writer.toString()); 460 } 461 462 @Test testUnescapeCsvIllegalStateException()463 public void testUnescapeCsvIllegalStateException() { 464 final StringWriter writer = new StringWriter(); 465 assertThrows(IllegalStateException.class, () -> StringEscapeUtils.UNESCAPE_CSV.translate("foo", -1, writer)); 466 } 467 468 /** 469 * Tests // https://issues.apache.org/jira/browse/LANG-480 470 */ 471 @Test testEscapeHtmlHighUnicode()472 public void testEscapeHtmlHighUnicode() { 473 // this is the utf8 representation of the character: 474 // COUNTING ROD UNIT DIGIT THREE 475 // in Unicode 476 // code point: U+1D362 477 final byte[] data = { (byte) 0xF0, (byte) 0x9D, (byte) 0x8D, (byte) 0xA2 }; 478 479 final String original = new String(data, StandardCharsets.UTF_8); 480 481 final String escaped = StringEscapeUtils.escapeHtml4( original ); 482 assertEquals(original, escaped, "High Unicode should not have been escaped"); 483 484 final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); 485 assertEquals(original, unescaped, "High Unicode should have been unchanged"); 486 487 // TODO: I think this should hold, needs further investigation 488 // String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); 489 // assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity); 490 } 491 492 /** 493 * Tests https://issues.apache.org/jira/browse/LANG-339 494 */ 495 @Test testEscapeHiragana()496 public void testEscapeHiragana() { 497 // Some random Japanese Unicode characters 498 final String original = "\u304B\u304C\u3068"; 499 final String escaped = StringEscapeUtils.escapeHtml4(original); 500 assertEquals(original, escaped, 501 "Hiragana character Unicode behavior should not be being escaped by escapeHtml4"); 502 503 final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); 504 505 assertEquals(escaped, unescaped, "Hiragana character Unicode behavior has changed - expected no unescaping"); 506 } 507 508 /** 509 * Tests https://issues.apache.org/jira/browse/LANG-708 510 * 511 * @throws IOException 512 * if an I/O error occurs 513 */ 514 @Test testLang708()515 public void testLang708() throws IOException { 516 final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/lang-708-input.txt")); 517 final String input = new String(inputBytes, StandardCharsets.UTF_8); 518 final String escaped = StringEscapeUtils.escapeEcmaScript(input); 519 // just the end: 520 assertTrue(escaped.endsWith("}]"), escaped); 521 // a little more: 522 assertTrue(escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"), escaped); 523 } 524 525 /** 526 * Tests https://issues.apache.org/jira/browse/LANG-720 527 */ 528 @Test testLang720()529 public void testLang720() { 530 final String input = "\ud842\udfb7" + "A"; 531 final String escaped = StringEscapeUtils.escapeXml(input); 532 assertEquals(input, escaped); 533 } 534 535 /** 536 * Tests https://issues.apache.org/jira/browse/LANG-911 537 */ 538 @Test testLang911()539 public void testLang911() { 540 final String bellsTest = "\ud83d\udc80\ud83d\udd14"; 541 final String value = StringEscapeUtils.escapeJava(bellsTest); 542 final String valueTest = StringEscapeUtils.unescapeJava(value); 543 assertEquals(bellsTest, valueTest); 544 } 545 546 @Test testEscapeJson()547 public void testEscapeJson() { 548 assertNull(StringEscapeUtils.escapeJson(null)); 549 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate(null, null)); 550 assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate("", null)); 551 552 assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\"")); 553 554 final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; 555 final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; 556 557 assertEquals(expected, StringEscapeUtils.escapeJson(input)); 558 } 559 560 @Test testUnescapeJson()561 public void testUnescapeJson() { 562 assertNull(StringEscapeUtils.unescapeJson(null)); 563 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate(null, null)); 564 assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate("", null)); 565 566 assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeJson("He didn't say, \\\"stop!\\\"")); 567 568 final String expected ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; 569 final String input = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; 570 571 assertEquals(expected, StringEscapeUtils.unescapeJson(input)); 572 } 573 } 574