1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * This is a series of unit tests for snippet creation and highlighting 19 * 20 * You can run this entire test case with: 21 * runtest -c com.android.emailcommon.utility.TextUtilitiesTests email 22 */ 23 package com.android.emailcommon.utility; 24 25 import android.test.AndroidTestCase; 26 import android.text.SpannableStringBuilder; 27 import android.text.style.BackgroundColorSpan; 28 29 public class TextUtilitiesTests extends AndroidTestCase { 30 testPlainSnippet()31 public void testPlainSnippet() { 32 // Test the simplest cases 33 assertEquals("", TextUtilities.makeSnippetFromPlainText(null)); 34 assertEquals("", TextUtilities.makeSnippetFromPlainText("")); 35 36 // Test handling leading, trailing, and duplicated whitespace 37 // Just test common whitespace characters; we calls Character.isWhitespace() internally, so 38 // other whitespace should be fine as well 39 assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n")); 40 char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; 41 assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c)); 42 assertEquals("foo bar", 43 TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); 44 45 // Handle duplicated - and = 46 assertEquals("Foo-Bar=Bletch", 47 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); 48 49 // We shouldn't muck with HTML entities 50 assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >")); 51 } 52 testHtmlSnippet()53 public void testHtmlSnippet() { 54 // Test the simplest cases 55 assertEquals("", TextUtilities.makeSnippetFromHtmlText(null)); 56 assertEquals("", TextUtilities.makeSnippetFromHtmlText("")); 57 58 // Test handling leading, trailing, and duplicated whitespace 59 // Just test common whitespace characters; we calls Character.isWhitespace() internally, so 60 // other whitespace should be fine as well 61 assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n")); 62 char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; 63 assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c)); 64 assertEquals("foo bar", 65 TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); 66 67 // Handle duplicated - and = 68 assertEquals("Foo-Bar=Bletch", 69 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); 70 71 // We should catch HTML entities in these tests 72 assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >")); 73 assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&<> "")); 74 // Test for decimal and hex entities 75 assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); 76 assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); 77 78 // Test for stripping simple tags 79 assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>")); 80 // TODO: Add tests here if/when we find problematic HTML 81 } 82 testStripHtmlEntityEdgeCases()83 public void testStripHtmlEntityEdgeCases() { 84 int[] skipCount = new int[1]; 85 // Bare & isn't an entity 86 char c = TextUtilities.stripHtmlEntity("&", 0, skipCount); 87 assertEquals(c, '&'); 88 assertEquals(0, skipCount[0]); 89 // Also not legal 90 c = TextUtilities.stripHtmlEntity("&;", 0, skipCount); 91 assertEquals(c, '&'); 92 assertEquals(0, skipCount[0]); 93 // This is an entity, but shouldn't be found 94 c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount); 95 assertEquals(c, '&'); 96 assertEquals(0, skipCount[0]); 97 // This is too long for an entity, even though it starts like a valid one 98 c = TextUtilities.stripHtmlEntity(" andmore;", 0, skipCount); 99 assertEquals(c, '&'); 100 assertEquals(0, skipCount[0]); 101 // Illegal decimal entities 102 c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount); 103 assertEquals(c, '&'); 104 assertEquals(0, skipCount[0]); 105 c = TextUtilities.stripHtmlEntity("B", 0, skipCount); 106 assertEquals(c, '&'); 107 assertEquals(0, skipCount[0]); 108 // Illegal hex entities 109 c = TextUtilities.stripHtmlEntity("઼", 0, skipCount); 110 assertEquals(c, '&'); 111 assertEquals(0, skipCount[0]); 112 // Illegal hex entities 113 c = TextUtilities.stripHtmlEntity("G", 0, skipCount); 114 assertEquals(c, '&'); 115 assertEquals(0, skipCount[0]); 116 } 117 testStripContent()118 public void testStripContent() { 119 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 120 "<html><style foo=\"bar\">Not</style>Visible</html>")); 121 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 122 "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>")); 123 assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText( 124 "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>")); 125 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 126 "<html>Visible<style foo=\"bar\">Not")); 127 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 128 "<html>Visible<style foo=\"bar\">Not</style>AgainVisible")); 129 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 130 "<html>Visible<style foo=\"bar\"/>AgainVisible")); 131 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 132 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible")); 133 } 134 135 /** 136 * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position 137 * for the tag named 'tag' and then check whether the calculated end position matches the known 138 * correct position. HTML text not containing an ampersand should generate a calculated end of 139 * -1 140 * @param text the HTML text to test 141 */ findTagEnd(String text, String tag)142 private void findTagEnd(String text, String tag) { 143 int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0); 144 int knownEnd = text.indexOf('@') + 2; 145 if (knownEnd == 1) { 146 // indexOf will return -1, so we'll get 1 as knownEnd 147 assertEquals(-1, calculatedEnd); 148 } else { 149 assertEquals(calculatedEnd, knownEnd); 150 } 151 } 152 testFindTagEnd()153 public void testFindTagEnd() { 154 // Test with <tag ... /> 155 findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag"); 156 // Test with <tag ...> ... </tag> 157 findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag"); 158 // Test with incomplete tag 159 findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag"); 160 // Test with space at end of tag 161 findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag "); 162 } 163 assertHighlightUnchanged(String str)164 private void assertHighlightUnchanged(String str) { 165 assertEquals(str, TextUtilities.highlightTermsInHtml(str, null)); 166 } 167 testHighlightNoTerm()168 public void testHighlightNoTerm() { 169 // With no search terms, the html should be unchanged 170 assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>"); 171 assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"); 172 assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not"); 173 assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible"); 174 assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible"); 175 assertHighlightUnchanged( 176 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"); 177 } 178 testHighlightSingleTermHtml()179 public void testHighlightSingleTermHtml() { 180 String str = "<html><style foo=\"bar\">Not</style>Visible</html>"; 181 // Test that tags aren't highlighted 182 assertEquals(str, TextUtilities.highlightTermsInHtml( 183 "<html><style foo=\"bar\">Not</style>Visible</html>", "style")); 184 // Test that non-tags are 185 assertEquals("<html><style foo=\"bar\">Not</style><span " + 186 "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + 187 "\">Visi</span>ble</html>", 188 TextUtilities.highlightTermsInHtml(str, "Visi")); 189 assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" + 190 " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + 191 "\">gain</span>Visible", 192 TextUtilities.highlightTermsInHtml( 193 "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain")); 194 } 195 brokentestHighlightSingleTermText()196 public void brokentestHighlightSingleTermText() { 197 // Sprinkle text with a few HTML characters to make sure they're ignored 198 String text = "This< should be visibl>e"; 199 // We should find this, because search terms are case insensitive 200 SpannableStringBuilder ssb = 201 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi"); 202 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 203 assertEquals(1, spans.length); 204 BackgroundColorSpan span = spans[0]; 205 assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); 206 assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span)); 207 // Heh; this next test fails.. we use the search term! 208 assertEquals(text, ssb.toString()); 209 210 // Multiple instances of the term 211 text = "The research word should be a search result"; 212 ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search"); 213 spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 214 assertEquals(2, spans.length); 215 span = spans[0]; 216 assertEquals(text.indexOf("search word"), ssb.getSpanStart(span)); 217 assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span)); 218 span = spans[1]; 219 assertEquals(text.indexOf("search result"), ssb.getSpanStart(span)); 220 assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span)); 221 assertEquals(text, ssb.toString()); 222 } 223 brokentestHighlightTwoTermText()224 public void brokentestHighlightTwoTermText() { 225 String text = "This should be visible"; 226 // We should find this, because search terms are case insensitive 227 SpannableStringBuilder ssb = 228 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should"); 229 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 230 assertEquals(2, spans.length); 231 BackgroundColorSpan span = spans[0]; 232 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 233 assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); 234 span = spans[1]; 235 assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); 236 assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span)); 237 assertEquals(text, ssb.toString()); 238 } 239 brokentestHighlightDuplicateTermText()240 public void brokentestHighlightDuplicateTermText() { 241 String text = "This should be visible"; 242 // We should find this, because search terms are case insensitive 243 SpannableStringBuilder ssb = 244 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should"); 245 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 246 assertEquals(1, spans.length); 247 BackgroundColorSpan span = spans[0]; 248 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 249 assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); 250 } 251 brokentestHighlightOverlapTermText()252 public void brokentestHighlightOverlapTermText() { 253 String text = "This shoulder is visible"; 254 // We should find this, because search terms are case insensitive 255 SpannableStringBuilder ssb = 256 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould"); 257 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 258 assertEquals(1, spans.length); 259 BackgroundColorSpan span = spans[0]; 260 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 261 assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span)); 262 } 263 264 brokentestHighlightOverlapTermText2()265 public void brokentestHighlightOverlapTermText2() { 266 String text = "The shoulders are visible"; 267 // We should find this, because search terms are case insensitive 268 SpannableStringBuilder ssb = 269 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders"); 270 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 271 assertEquals(2, spans.length); 272 BackgroundColorSpan span = spans[0]; 273 assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span)); 274 assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span)); 275 span = spans[1]; 276 // Just the 's' should be caught in the 2nd span 277 assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span)); 278 assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span)); 279 assertEquals(text, ssb.toString()); 280 } 281 // For debugging large HTML samples 282 283 // private String readLargeSnippet(String fn) { 284 // File file = mContext.getFileStreamPath(fn); 285 // StringBuffer sb = new StringBuffer(); 286 // BufferedReader reader = null; 287 // try { 288 // String text; 289 // reader = new BufferedReader(new FileReader(file)); 290 // while ((text = reader.readLine()) != null) { 291 // sb.append(text); 292 // sb.append(" "); 293 // } 294 // } catch (IOException e) { 295 // } 296 // return sb.toString(); 297 // } 298 } 299