1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.tools.lint.checks; 18 19 import static com.android.tools.lint.detector.api.LintConstants.TAG_STRING; 20 import static com.android.tools.lint.detector.api.LintConstants.TAG_STRING_ARRAY; 21 22 import com.android.annotations.VisibleForTesting; 23 import com.android.resources.ResourceFolderType; 24 import com.android.tools.lint.detector.api.Category; 25 import com.android.tools.lint.detector.api.Context; 26 import com.android.tools.lint.detector.api.Issue; 27 import com.android.tools.lint.detector.api.ResourceXmlDetector; 28 import com.android.tools.lint.detector.api.Scope; 29 import com.android.tools.lint.detector.api.Severity; 30 import com.android.tools.lint.detector.api.Speed; 31 import com.android.tools.lint.detector.api.XmlContext; 32 33 import org.w3c.dom.Element; 34 import org.w3c.dom.Node; 35 import org.w3c.dom.NodeList; 36 37 import java.util.ArrayList; 38 import java.util.Arrays; 39 import java.util.Collection; 40 import java.util.List; 41 import java.util.regex.Matcher; 42 import java.util.regex.Pattern; 43 44 /** 45 * Checks for various typographical issues in string definitions. 46 */ 47 public class TypographyDetector extends ResourceXmlDetector { 48 /** Replace hyphens with dashes? */ 49 public static final Issue DASHES = Issue.create( 50 "TypographyDashes", //$NON-NLS-1$ 51 "Looks for usages of hyphens which can be replaced by n dash and m dash characters", 52 "The \"n dash\" (\u2013, –) and the \"m dash\" (\u2014, —) " + 53 "characters are used for ranges (n dash) and breaks (m dash). Using these " + 54 "instead of plain hyphens can make text easier to read and your application " + 55 "will look more polished.", 56 Category.TYPOGRAPHY, 57 5, 58 Severity.WARNING, 59 TypographyDetector.class, 60 Scope.RESOURCE_FILE_SCOPE). 61 setMoreInfo("http://en.wikipedia.org/wiki/Dash"); //$NON-NLS-1$ 62 63 /** Replace dumb quotes with smart quotes? */ 64 public static final Issue QUOTES = Issue.create( 65 "TypographyQuotes", //$NON-NLS-1$ 66 "Looks for straight quotes which can be replaced by curvy quotes", 67 "Straight single quotes and double quotes, when used as a pair, can be replaced " + 68 "by \"curvy quotes\" (or directional quotes). This can make the text more " + 69 "readable.\n" + 70 "\n" + 71 "Note that you should never use grave accents and apostrophes to quote, " + 72 "`like this'.\n" + 73 "\n" + 74 "(Also note that you should not use curvy quotes for code fragments.)", 75 Category.TYPOGRAPHY, 76 5, 77 Severity.WARNING, 78 TypographyDetector.class, 79 Scope.RESOURCE_FILE_SCOPE). 80 setMoreInfo("http://en.wikipedia.org/wiki/Quotation_mark"). //$NON-NLS-1$ 81 // This feature is apparently controversial: recent apps have started using 82 // straight quotes to avoid inconsistencies. Disabled by default for now. 83 setEnabledByDefault(false); 84 85 /** Replace fraction strings with fraction characters? */ 86 public static final Issue FRACTIONS = Issue.create( 87 "TypographyFractions", //$NON-NLS-1$ 88 "Looks for fraction strings which can be replaced with a fraction character", 89 "You can replace certain strings, such as 1/2, and 1/4, with dedicated " + 90 "characters for these, such as \u00BD (½) and \00BC (¼). " + 91 "This can help make the text more readable.", 92 Category.TYPOGRAPHY, 93 5, 94 Severity.WARNING, 95 TypographyDetector.class, 96 Scope.RESOURCE_FILE_SCOPE). 97 setMoreInfo("http://en.wikipedia.org/wiki/Number_Forms"); //$NON-NLS-1$ 98 99 /** Replace ... with the ellipsis character? */ 100 public static final Issue ELLIPSIS = Issue.create( 101 "TypographyEllipsis", //$NON-NLS-1$ 102 "Looks for ellipsis strings (...) which can be replaced with an ellipsis character", 103 "You can replace the string \"...\" with a dedicated ellipsis character, " + 104 "ellipsis character (\u2026, …). This can help make the text more readable.", 105 Category.TYPOGRAPHY, 106 5, 107 Severity.WARNING, 108 TypographyDetector.class, 109 Scope.RESOURCE_FILE_SCOPE). 110 setMoreInfo("http://en.wikipedia.org/wiki/Ellipsis"); //$NON-NLS-1$ 111 112 /** The main issue discovered by this detector */ 113 public static final Issue OTHER = Issue.create( 114 "TypographyOther", //$NON-NLS-1$ 115 "Looks for miscellaneous typographical problems like replacing (c) with \u00A9", 116 "This check looks for miscellaneous typographical problems and offers replacement " + 117 "sequences that will make the text easier to read and your application more " + 118 "polished.", 119 Category.TYPOGRAPHY, 120 3, 121 Severity.WARNING, 122 TypographyDetector.class, 123 Scope.RESOURCE_FILE_SCOPE); 124 125 private static final String GRAVE_QUOTE_MESSAGE = 126 "Avoid quoting with grave accents; use apostrophes or better yet directional quotes instead"; 127 private static final String ELLIPSIS_MESSAGE = 128 "Replace \"...\" with ellipsis character (\u2026, …) ?"; 129 private static final String EN_DASH_MESSAGE = 130 "Replace \"-\" with an \"en dash\" character (\u2013, –) ?"; 131 private static final String EM_DASH_MESSAGE = 132 "Replace \"--\" with an \"em dash\" character (\u2014, —) ?"; 133 private static final String TYPOGRAPHIC_APOSTROPHE_MESSAGE = 134 "Replace apostrophe (') with typographic apostrophe (\u2019, ’) ?"; 135 private static final String SINGLE_QUOTE_MESSAGE = 136 "Replace straight quotes ('') with directional quotes (\u2018\u2019, ‘ and ’) ?"; 137 private static final String DBL_QUOTES_MESSAGE = 138 "Replace straight quotes (\") with directional quotes (\u201C\u201D, “ and ”) ?"; 139 private static final String COPYRIGHT_MESSAGE = 140 "Replace (c) with copyright symbol \u00A9 (©) ?"; 141 142 /** 143 * Pattern used to detect scenarios which can be replaced with n dashes: a 144 * numeric range with a hyphen in the middle (and possibly spaces) 145 */ 146 @VisibleForTesting 147 static final Pattern HYPHEN_RANGE_PATTERN = 148 Pattern.compile(".*(\\d+\\s*)-(\\s*\\d+).*"); //$NON-NLS-1$ 149 150 /** 151 * Pattern used to detect scenarios where a grave accent mark is used 152 * to do ASCII quotations of the form `this'' or ``this'', which is frowned upon. 153 * This pattern tries to avoid falsely complaining about strings like 154 * "Type Option-` then 'Escape'." 155 */ 156 @VisibleForTesting 157 static final Pattern GRAVE_QUOTATION = 158 Pattern.compile("(^[^`]*`[^'`]+'[^']*$)|(^[^`]*``[^'`]+''[^']*$)"); //$NON-NLS-1$ 159 160 /** 161 * Pattern used to detect common fractions, e.g. 1/2, 1/3, 2/3, 1/4, 3/4 and 162 * variations like 2 / 3, but not 11/22 and so on. 163 */ 164 @VisibleForTesting 165 static final Pattern FRACTION_PATTERN = 166 Pattern.compile(".*\\b([13])\\s*/\\s*([234])\\b.*"); //$NON-NLS-1$ 167 168 /** 169 * Pattern used to detect single quote strings, such as 'hello', but 170 * not just quoted strings like 'Double quote: "', and not sentences 171 * where there are multiple apostrophes but not in a quoting context such 172 * as "Mind Your P's and Q's". 173 */ 174 @VisibleForTesting 175 static final Pattern SINGLE_QUOTE = 176 Pattern.compile(".*\\W*'[^']+'(\\W.*)?"); //$NON-NLS-1$ 177 178 private static final String FRACTION_MESSAGE = 179 "Use fraction character %1$c (%2$s) instead of %3$s ?"; 180 181 private static final String FRACTION_MESSAGE_PATTERN = 182 "Use fraction character (.+) \\((.+)\\) instead of (.+) \\?"; 183 184 private boolean mCheckDashes; 185 private boolean mCheckQuotes; 186 private boolean mCheckFractions; 187 private boolean mCheckEllipsis; 188 private boolean mCheckMisc; 189 190 /** Constructs a new {@link TypographyDetector} */ TypographyDetector()191 public TypographyDetector() { 192 } 193 194 @Override appliesTo(ResourceFolderType folderType)195 public boolean appliesTo(ResourceFolderType folderType) { 196 return folderType == ResourceFolderType.VALUES; 197 } 198 199 @Override getSpeed()200 public Speed getSpeed() { 201 return Speed.FAST; 202 } 203 204 @Override getApplicableElements()205 public Collection<String> getApplicableElements() { 206 return Arrays.asList( 207 TAG_STRING, 208 TAG_STRING_ARRAY 209 ); 210 } 211 212 @Override beforeCheckProject(Context context)213 public void beforeCheckProject(Context context) { 214 mCheckDashes = context.isEnabled(DASHES); 215 mCheckQuotes = context.isEnabled(QUOTES); 216 mCheckFractions = context.isEnabled(FRACTIONS); 217 mCheckEllipsis = context.isEnabled(ELLIPSIS); 218 mCheckMisc = context.isEnabled(OTHER); 219 } 220 221 @Override visitElement(XmlContext context, Element element)222 public void visitElement(XmlContext context, Element element) { 223 NodeList childNodes = element.getChildNodes(); 224 for (int i = 0, n = childNodes.getLength(); i < n; i++) { 225 Node child = childNodes.item(i); 226 if (child.getNodeType() == Node.TEXT_NODE) { 227 String text = child.getNodeValue(); 228 checkText(context, element, text); 229 } else if (child.getNodeType() == Node.ELEMENT_NODE && 230 child.getParentNode().getNodeName().equals(TAG_STRING_ARRAY)) { 231 // String array item children 232 NodeList items = child.getChildNodes(); 233 for (int j = 0, m = items.getLength(); j < m; j++) { 234 Node item = items.item(j); 235 if (item.getNodeType() == Node.TEXT_NODE) { 236 String text = item.getNodeValue(); 237 checkText(context, child, text); 238 } 239 } 240 } 241 } 242 } 243 checkText(XmlContext context, Node element, String text)244 private void checkText(XmlContext context, Node element, String text) { 245 if (mCheckEllipsis) { 246 // Replace ... with ellipsis character? 247 int ellipsis = text.indexOf("..."); //$NON-NLS-1$ 248 if (ellipsis != -1 && !text.startsWith(".", ellipsis + 3)) { //$NON-NLS-1$ 249 context.report(ELLIPSIS, element, context.getLocation(element), 250 ELLIPSIS_MESSAGE, null); 251 } 252 } 253 254 // Dashes 255 if (mCheckDashes) { 256 int hyphen = text.indexOf('-'); 257 if (hyphen != -1) { 258 // n dash 259 Matcher matcher = HYPHEN_RANGE_PATTERN.matcher(text); 260 if (matcher.matches()) { 261 // Make sure that if there is no space before digit there isn't 262 // one on the left either -- since we don't want to consider 263 // "1 2 -3" as a range from 2 to 3 264 boolean isNegativeNumber = 265 !Character.isWhitespace(matcher.group(2).charAt(0)) && 266 Character.isWhitespace(matcher.group(1).charAt( 267 matcher.group(1).length() - 1)); 268 if (!isNegativeNumber) { 269 context.report(DASHES, element, context.getLocation(element), 270 EN_DASH_MESSAGE, 271 null); 272 } 273 } 274 275 // m dash 276 int emdash = text.indexOf("--"); //$NON-NLS-1$ 277 // Don't suggest replacing -- or "--" with an m dash since these are sometimes 278 // used as digit marker strings 279 if (emdash > 1 && !text.startsWith("-", emdash + 2)) { //$NON-NLS-1$ 280 context.report(DASHES, element, context.getLocation(element), 281 EM_DASH_MESSAGE, null); 282 } 283 } 284 } 285 286 if (mCheckQuotes) { 287 // Check for single quotes that can be replaced with directional quotes 288 int quoteStart = text.indexOf('\''); 289 if (quoteStart != -1) { 290 int quoteEnd = text.indexOf('\'', quoteStart + 1); 291 if (quoteEnd != -1 && quoteEnd > quoteStart + 1 292 && (quoteEnd < text.length() -1 || quoteStart > 0) 293 && SINGLE_QUOTE.matcher(text).matches()) { 294 context.report(QUOTES, element, context.getLocation(element), 295 SINGLE_QUOTE_MESSAGE, null); 296 return; 297 } 298 299 // Check for apostrophes that can be replaced by typographic apostrophes 300 if (quoteEnd == -1 && quoteStart > 0 301 && Character.isLetterOrDigit(text.charAt(quoteStart - 1))) { 302 context.report(QUOTES, element, context.getLocation(element), 303 TYPOGRAPHIC_APOSTROPHE_MESSAGE, null); 304 return; 305 } 306 } 307 308 // Check for double quotes that can be replaced by directional double quotes 309 quoteStart = text.indexOf('"'); 310 if (quoteStart != -1) { 311 int quoteEnd = text.indexOf('"', quoteStart + 1); 312 if (quoteEnd != -1 && quoteEnd > quoteStart + 1) { 313 if (quoteEnd < text.length() -1 || quoteStart > 0) { 314 context.report(QUOTES, element, context.getLocation(element), 315 DBL_QUOTES_MESSAGE, null); 316 return; 317 } 318 } 319 } 320 321 // Check for grave accent quotations 322 if (text.indexOf('`') != -1 && GRAVE_QUOTATION.matcher(text).matches()) { 323 // Are we indenting ``like this'' or `this' ? If so, complain 324 context.report(QUOTES, element, context.getLocation(element), 325 GRAVE_QUOTE_MESSAGE, null); 326 return; 327 } 328 329 // Consider suggesting other types of directional quotes, such as guillemets, in 330 // other languages? 331 // There are a lot of exceptions and special cases to be considered so 332 // this will need careful implementation and testing. 333 // See http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks 334 } 335 336 // Fraction symbols? 337 if (mCheckFractions && text.indexOf('/') != -1) { 338 Matcher matcher = FRACTION_PATTERN.matcher(text); 339 if (matcher.matches()) { 340 String top = matcher.group(1); // Numerator 341 String bottom = matcher.group(2); // Denominator 342 if (top.equals("1") && bottom.equals("2")) { //$NON-NLS-1$ //$NON-NLS-2$ 343 context.report(FRACTIONS, element, context.getLocation(element), 344 String.format(FRACTION_MESSAGE, '\u00BD', "½", "1/2"), null); 345 } else if (top.equals("1") && bottom.equals("4")) { //$NON-NLS-1$ //$NON-NLS-2$ 346 context.report(FRACTIONS, element, context.getLocation(element), 347 String.format(FRACTION_MESSAGE, '\u00BC', "¼", "1/4"), null); 348 } else if (top.equals("3") && bottom.equals("4")) { //$NON-NLS-1$ //$NON-NLS-2$ 349 context.report(FRACTIONS, element, context.getLocation(element), 350 String.format(FRACTION_MESSAGE, '\u00BE', "¾", "3/4"), null); 351 } else if (top.equals("1") && bottom.equals("3")) { //$NON-NLS-1$ //$NON-NLS-2$ 352 context.report(FRACTIONS, element, context.getLocation(element), 353 String.format(FRACTION_MESSAGE, '\u2153', "⅓", "1/3"), null); 354 } else if (top.equals("2") && bottom.equals("3")) { //$NON-NLS-1$ //$NON-NLS-2$ 355 context.report(FRACTIONS, element, context.getLocation(element), 356 String.format(FRACTION_MESSAGE, '\u2154', "⅔", "2/3"), null); 357 } 358 } 359 } 360 361 if (mCheckMisc) { 362 // Fix copyright symbol? 363 if (text.indexOf('(') != -1 364 && (text.contains("(c)") || text.contains("(C)"))) { //$NON-NLS-1$ //$NON-NLS-2$ 365 // Suggest replacing with copyright symbol? 366 context.report(OTHER, element, context.getLocation(element), 367 COPYRIGHT_MESSAGE, null); 368 // Replace (R) and TM as well? There are unicode characters for these but they 369 // are probably not very common within Android app strings. 370 } 371 } 372 } 373 374 /** 375 * An object describing a single edit to be made. The offset points to a 376 * location to start editing; the length is the number of characters to 377 * delete, and the replaceWith string points to a string to insert at the 378 * offset. Note that this can model not just replacement edits but deletions 379 * (empty replaceWith) and insertions (replace length = 0) too. 380 */ 381 public static class ReplaceEdit { 382 /** The offset of the edit */ 383 public final int offset; 384 /** The number of characters to delete at the offset */ 385 public final int length; 386 /** The characters to insert at the offset */ 387 public final String replaceWith; 388 389 /** 390 * Creates a new replace edit 391 * 392 * @param offset the offset of the edit 393 * @param length the number of characters to delete at the offset 394 * @param replaceWith the characters to insert at the offset 395 */ ReplaceEdit(int offset, int length, String replaceWith)396 public ReplaceEdit(int offset, int length, String replaceWith) { 397 super(); 398 this.offset = offset; 399 this.length = length; 400 this.replaceWith = replaceWith; 401 } 402 } 403 404 /** 405 * Returns a list of edits to be applied to fix the suggestion made by the 406 * given warning. The specific issue id and message should be the message 407 * provided by this detector in an earlier run. 408 * <p> 409 * This is intended to help tools implement automatic fixes of these 410 * warnings. The reason only the message and issue id can be provided 411 * instead of actual state passed in the data field to a reporter is that 412 * fix operation can be run much later than the lint is processed (for 413 * example, in a subsequent run of the IDE when only the warnings have been 414 * persisted), 415 * 416 * @param issueId the issue id, which should be the id for one of the 417 * typography issues 418 * @param message the actual error message, which should be a message 419 * provided by this detector 420 * @param textNode a text node which corresponds to the text node the 421 * warning operated on 422 * @return a list of edits, which is never null but could be empty. The 423 * offsets in the edit objects are relative to the text node. 424 */ getEdits(String issueId, String message, Node textNode)425 public static List<ReplaceEdit> getEdits(String issueId, String message, Node textNode) { 426 List<ReplaceEdit> edits = new ArrayList<ReplaceEdit>(); 427 String text = textNode.getNodeValue(); 428 if (message.equals(ELLIPSIS_MESSAGE)) { 429 int offset = text.indexOf("..."); //$NON-NLS-1$ 430 if (offset != -1) { 431 edits.add(new ReplaceEdit(offset, 3, "\u2026")); //$NON-NLS-1$ 432 } 433 } else if (message.equals(EN_DASH_MESSAGE)) { 434 int offset = text.indexOf('-'); 435 if (offset != -1) { 436 edits.add(new ReplaceEdit(offset, 1, "\u2013")); //$NON-NLS-1$ 437 } 438 } else if (message.equals(EM_DASH_MESSAGE)) { 439 int offset = text.indexOf("--"); //$NON-NLS-1$ 440 if (offset != -1) { 441 edits.add(new ReplaceEdit(offset, 2, "\u2014")); //$NON-NLS-1$ 442 } 443 } else if (message.equals(TYPOGRAPHIC_APOSTROPHE_MESSAGE)) { 444 int offset = text.indexOf('\''); 445 if (offset != -1) { 446 edits.add(new ReplaceEdit(offset, 1, "\u2019")); //$NON-NLS-1$ 447 } 448 } else if (message.equals(COPYRIGHT_MESSAGE)) { 449 int offset = text.indexOf("(c)"); //$NON-NLS-1$ 450 if (offset == -1) { 451 offset = text.indexOf("(C)"); //$NON-NLS-1$ 452 } 453 if (offset != -1) { 454 edits.add(new ReplaceEdit(offset, 3, "\u00A9")); //$NON-NLS-1$ 455 } 456 } else if (message.equals(SINGLE_QUOTE_MESSAGE)) { 457 int offset = text.indexOf('\''); 458 if (offset != -1) { 459 int endOffset = text.indexOf("'", offset + 1); //$NON-NLS-1$ 460 if (endOffset != -1) { 461 edits.add(new ReplaceEdit(offset, 1, "\u2018")); //$NON-NLS-1$ 462 edits.add(new ReplaceEdit(endOffset, 1, "\u2019")); //$NON-NLS-1$ 463 } 464 } 465 } else if (message.equals(DBL_QUOTES_MESSAGE)) { 466 int offset = text.indexOf('"'); 467 if (offset != -1) { 468 int endOffset = text.indexOf('"', offset + 1); 469 if (endOffset != -1) { 470 edits.add(new ReplaceEdit(offset, 1, "\u201C")); //$NON-NLS-1$ 471 edits.add(new ReplaceEdit(endOffset, 1, "\u201D")); //$NON-NLS-1$ 472 } 473 } 474 } else if (message.equals(GRAVE_QUOTE_MESSAGE)) { 475 int offset = text.indexOf('`'); 476 if (offset != -1) { 477 int endOffset = text.indexOf('\'', offset + 1); 478 if (endOffset != -1) { 479 edits.add(new ReplaceEdit(offset, 1, "\u2018")); //$NON-NLS-1$ 480 edits.add(new ReplaceEdit(endOffset, 1, "\u2019")); //$NON-NLS-1$ 481 } 482 } 483 } else { 484 Matcher matcher = Pattern.compile(FRACTION_MESSAGE_PATTERN).matcher(message); 485 if (matcher.find()) { 486 // "Use fraction character %1$c (%2$s) instead of %3$s ?"; 487 String replace = matcher.group(3); 488 int offset = text.indexOf(replace); 489 if (offset != -1) { 490 String replaceWith = matcher.group(2); 491 edits.add(new ReplaceEdit(offset, replace.length(), replaceWith)); 492 } 493 } 494 } 495 496 return edits; 497 } 498 } 499