• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.tools.lint.checks;
18 
19 import static com.android.tools.lint.detector.api.LintConstants.TAG_STRING;
20 import static com.android.tools.lint.detector.api.LintConstants.TAG_STRING_ARRAY;
21 
22 import com.android.annotations.VisibleForTesting;
23 import com.android.resources.ResourceFolderType;
24 import com.android.tools.lint.detector.api.Category;
25 import com.android.tools.lint.detector.api.Context;
26 import com.android.tools.lint.detector.api.Issue;
27 import com.android.tools.lint.detector.api.ResourceXmlDetector;
28 import com.android.tools.lint.detector.api.Scope;
29 import com.android.tools.lint.detector.api.Severity;
30 import com.android.tools.lint.detector.api.Speed;
31 import com.android.tools.lint.detector.api.XmlContext;
32 
33 import org.w3c.dom.Element;
34 import org.w3c.dom.Node;
35 import org.w3c.dom.NodeList;
36 
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.Collection;
40 import java.util.List;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43 
44 /**
45  * Checks for various typographical issues in string definitions.
46  */
47 public class TypographyDetector extends ResourceXmlDetector {
48     /** Replace hyphens with dashes? */
49     public static final Issue DASHES = Issue.create(
50             "TypographyDashes", //$NON-NLS-1$
51             "Looks for usages of hyphens which can be replaced by n dash and m dash characters",
52             "The \"n dash\" (\u2013, –) and the \"m dash\" (\u2014, —) " +
53             "characters are used for ranges (n dash) and breaks (m dash). Using these " +
54             "instead of plain hyphens can make text easier to read and your application " +
55             "will look more polished.",
56             Category.TYPOGRAPHY,
57             5,
58             Severity.WARNING,
59             TypographyDetector.class,
60             Scope.RESOURCE_FILE_SCOPE).
61             setMoreInfo("http://en.wikipedia.org/wiki/Dash"); //$NON-NLS-1$
62 
63     /** Replace dumb quotes with smart quotes? */
64     public static final Issue QUOTES = Issue.create(
65             "TypographyQuotes", //$NON-NLS-1$
66             "Looks for straight quotes which can be replaced by curvy quotes",
67             "Straight single quotes and double quotes, when used as a pair, can be replaced " +
68             "by \"curvy quotes\" (or directional quotes). This can make the text more " +
69             "readable.\n" +
70             "\n" +
71             "Note that you should never use grave accents and apostrophes to quote, " +
72             "`like this'.\n" +
73             "\n" +
74             "(Also note that you should not use curvy quotes for code fragments.)",
75             Category.TYPOGRAPHY,
76             5,
77             Severity.WARNING,
78             TypographyDetector.class,
79             Scope.RESOURCE_FILE_SCOPE).
80             setMoreInfo("http://en.wikipedia.org/wiki/Quotation_mark"). //$NON-NLS-1$
81             // This feature is apparently controversial: recent apps have started using
82             // straight quotes to avoid inconsistencies. Disabled by default for now.
83             setEnabledByDefault(false);
84 
85     /** Replace fraction strings with fraction characters? */
86     public static final Issue FRACTIONS = Issue.create(
87             "TypographyFractions", //$NON-NLS-1$
88             "Looks for fraction strings which can be replaced with a fraction character",
89             "You can replace certain strings, such as 1/2, and 1/4, with dedicated " +
90             "characters for these, such as \u00BD (½) and \00BC (¼). " +
91             "This can help make the text more readable.",
92             Category.TYPOGRAPHY,
93             5,
94             Severity.WARNING,
95             TypographyDetector.class,
96             Scope.RESOURCE_FILE_SCOPE).
97             setMoreInfo("http://en.wikipedia.org/wiki/Number_Forms"); //$NON-NLS-1$
98 
99     /** Replace ... with the ellipsis character? */
100     public static final Issue ELLIPSIS = Issue.create(
101             "TypographyEllipsis", //$NON-NLS-1$
102             "Looks for ellipsis strings (...) which can be replaced with an ellipsis character",
103             "You can replace the string \"...\" with a dedicated ellipsis character, " +
104             "ellipsis character (\u2026, …). This can help make the text more readable.",
105             Category.TYPOGRAPHY,
106             5,
107             Severity.WARNING,
108             TypographyDetector.class,
109             Scope.RESOURCE_FILE_SCOPE).
110             setMoreInfo("http://en.wikipedia.org/wiki/Ellipsis"); //$NON-NLS-1$
111 
112     /** The main issue discovered by this detector */
113     public static final Issue OTHER = Issue.create(
114             "TypographyOther", //$NON-NLS-1$
115             "Looks for miscellaneous typographical problems like replacing (c) with \u00A9",
116             "This check looks for miscellaneous typographical problems and offers replacement " +
117             "sequences that will make the text easier to read and your application more " +
118             "polished.",
119             Category.TYPOGRAPHY,
120             3,
121             Severity.WARNING,
122             TypographyDetector.class,
123             Scope.RESOURCE_FILE_SCOPE);
124 
125     private static final String GRAVE_QUOTE_MESSAGE =
126         "Avoid quoting with grave accents; use apostrophes or better yet directional quotes instead";
127     private static final String ELLIPSIS_MESSAGE =
128         "Replace \"...\" with ellipsis character (\u2026, …) ?";
129     private static final String EN_DASH_MESSAGE =
130         "Replace \"-\" with an \"en dash\" character (\u2013, –) ?";
131     private static final String EM_DASH_MESSAGE =
132         "Replace \"--\" with an \"em dash\" character (\u2014, —) ?";
133     private static final String TYPOGRAPHIC_APOSTROPHE_MESSAGE =
134         "Replace apostrophe (') with typographic apostrophe (\u2019, ’) ?";
135     private static final String SINGLE_QUOTE_MESSAGE =
136         "Replace straight quotes ('') with directional quotes (\u2018\u2019, ‘ and ’) ?";
137     private static final String DBL_QUOTES_MESSAGE =
138         "Replace straight quotes (\") with directional quotes (\u201C\u201D, “ and ”) ?";
139     private static final String COPYRIGHT_MESSAGE =
140         "Replace (c) with copyright symbol \u00A9 (©) ?";
141 
142     /**
143      * Pattern used to detect scenarios which can be replaced with n dashes: a
144      * numeric range with a hyphen in the middle (and possibly spaces)
145      */
146     @VisibleForTesting
147     static final Pattern HYPHEN_RANGE_PATTERN =
148             Pattern.compile(".*(\\d+\\s*)-(\\s*\\d+).*"); //$NON-NLS-1$
149 
150     /**
151      * Pattern used to detect scenarios where a grave accent mark is used
152      * to do ASCII quotations of the form `this'' or ``this'', which is frowned upon.
153      * This pattern tries to avoid falsely complaining about strings like
154      * "Type Option-` then 'Escape'."
155      */
156     @VisibleForTesting
157     static final Pattern GRAVE_QUOTATION =
158             Pattern.compile("(^[^`]*`[^'`]+'[^']*$)|(^[^`]*``[^'`]+''[^']*$)"); //$NON-NLS-1$
159 
160     /**
161      * Pattern used to detect common fractions, e.g. 1/2, 1/3, 2/3, 1/4, 3/4 and
162      * variations like 2 / 3, but not 11/22 and so on.
163      */
164     @VisibleForTesting
165     static final Pattern FRACTION_PATTERN =
166             Pattern.compile(".*\\b([13])\\s*/\\s*([234])\\b.*"); //$NON-NLS-1$
167 
168     /**
169      * Pattern used to detect single quote strings, such as 'hello', but
170      * not just quoted strings like 'Double quote: "', and not sentences
171      * where there are multiple apostrophes but not in a quoting context such
172      * as "Mind Your P's and Q's".
173      */
174     @VisibleForTesting
175     static final Pattern SINGLE_QUOTE =
176             Pattern.compile(".*\\W*'[^']+'(\\W.*)?"); //$NON-NLS-1$
177 
178     private static final String FRACTION_MESSAGE =
179             "Use fraction character %1$c (%2$s) instead of %3$s ?";
180 
181     private static final String FRACTION_MESSAGE_PATTERN =
182             "Use fraction character (.+) \\((.+)\\) instead of (.+) \\?";
183 
184     private boolean mCheckDashes;
185     private boolean mCheckQuotes;
186     private boolean mCheckFractions;
187     private boolean mCheckEllipsis;
188     private boolean mCheckMisc;
189 
190     /** Constructs a new {@link TypographyDetector} */
TypographyDetector()191     public TypographyDetector() {
192     }
193 
194     @Override
appliesTo(ResourceFolderType folderType)195     public boolean appliesTo(ResourceFolderType folderType) {
196         return folderType == ResourceFolderType.VALUES;
197     }
198 
199     @Override
getSpeed()200     public Speed getSpeed() {
201         return Speed.FAST;
202     }
203 
204     @Override
getApplicableElements()205     public Collection<String> getApplicableElements() {
206         return Arrays.asList(
207                 TAG_STRING,
208                 TAG_STRING_ARRAY
209         );
210     }
211 
212     @Override
beforeCheckProject(Context context)213     public void beforeCheckProject(Context context) {
214         mCheckDashes = context.isEnabled(DASHES);
215         mCheckQuotes = context.isEnabled(QUOTES);
216         mCheckFractions = context.isEnabled(FRACTIONS);
217         mCheckEllipsis = context.isEnabled(ELLIPSIS);
218         mCheckMisc = context.isEnabled(OTHER);
219     }
220 
221     @Override
visitElement(XmlContext context, Element element)222     public void visitElement(XmlContext context, Element element) {
223         NodeList childNodes = element.getChildNodes();
224         for (int i = 0, n = childNodes.getLength(); i < n; i++) {
225             Node child = childNodes.item(i);
226             if (child.getNodeType() == Node.TEXT_NODE) {
227                 String text = child.getNodeValue();
228                 checkText(context, element, text);
229             } else if (child.getNodeType() == Node.ELEMENT_NODE &&
230                     child.getParentNode().getNodeName().equals(TAG_STRING_ARRAY)) {
231                 // String array item children
232                 NodeList items = child.getChildNodes();
233                 for (int j = 0, m = items.getLength(); j < m; j++) {
234                     Node item = items.item(j);
235                     if (item.getNodeType() == Node.TEXT_NODE) {
236                         String text = item.getNodeValue();
237                         checkText(context, child, text);
238                     }
239                 }
240             }
241         }
242     }
243 
checkText(XmlContext context, Node element, String text)244     private void checkText(XmlContext context, Node element, String text) {
245         if (mCheckEllipsis) {
246             // Replace ... with ellipsis character?
247             int ellipsis = text.indexOf("..."); //$NON-NLS-1$
248             if (ellipsis != -1 && !text.startsWith(".", ellipsis + 3)) { //$NON-NLS-1$
249                 context.report(ELLIPSIS, element, context.getLocation(element),
250                         ELLIPSIS_MESSAGE, null);
251             }
252         }
253 
254         // Dashes
255         if (mCheckDashes) {
256             int hyphen = text.indexOf('-');
257             if (hyphen != -1) {
258                 // n dash
259                 Matcher matcher = HYPHEN_RANGE_PATTERN.matcher(text);
260                 if (matcher.matches()) {
261                     // Make sure that if there is no space before digit there isn't
262                     // one on the left either -- since we don't want to consider
263                     // "1 2 -3" as a range from 2 to 3
264                     boolean isNegativeNumber =
265                         !Character.isWhitespace(matcher.group(2).charAt(0)) &&
266                             Character.isWhitespace(matcher.group(1).charAt(
267                                     matcher.group(1).length() - 1));
268                     if (!isNegativeNumber) {
269                         context.report(DASHES, element, context.getLocation(element),
270                             EN_DASH_MESSAGE,
271                             null);
272                     }
273                 }
274 
275                 // m dash
276                 int emdash = text.indexOf("--"); //$NON-NLS-1$
277                 // Don't suggest replacing -- or "--" with an m dash since these are sometimes
278                 // used as digit marker strings
279                 if (emdash > 1 && !text.startsWith("-", emdash + 2)) {   //$NON-NLS-1$
280                     context.report(DASHES, element, context.getLocation(element),
281                             EM_DASH_MESSAGE, null);
282                 }
283             }
284         }
285 
286         if (mCheckQuotes) {
287             // Check for single quotes that can be replaced with directional quotes
288             int quoteStart = text.indexOf('\'');
289             if (quoteStart != -1) {
290                 int quoteEnd = text.indexOf('\'', quoteStart + 1);
291                 if (quoteEnd != -1 && quoteEnd > quoteStart + 1
292                         && (quoteEnd < text.length() -1 || quoteStart > 0)
293                         && SINGLE_QUOTE.matcher(text).matches()) {
294                     context.report(QUOTES, element, context.getLocation(element),
295                         SINGLE_QUOTE_MESSAGE, null);
296                     return;
297                 }
298 
299                 // Check for apostrophes that can be replaced by typographic apostrophes
300                 if (quoteEnd == -1 && quoteStart > 0
301                         && Character.isLetterOrDigit(text.charAt(quoteStart - 1))) {
302                     context.report(QUOTES, element, context.getLocation(element),
303                             TYPOGRAPHIC_APOSTROPHE_MESSAGE, null);
304                     return;
305                 }
306             }
307 
308             // Check for double quotes that can be replaced by directional double quotes
309             quoteStart = text.indexOf('"');
310             if (quoteStart != -1) {
311                 int quoteEnd = text.indexOf('"', quoteStart + 1);
312                 if (quoteEnd != -1 && quoteEnd > quoteStart + 1) {
313                     if (quoteEnd < text.length() -1 || quoteStart > 0) {
314                         context.report(QUOTES, element, context.getLocation(element),
315                             DBL_QUOTES_MESSAGE, null);
316                         return;
317                     }
318                 }
319             }
320 
321             // Check for grave accent quotations
322             if (text.indexOf('`') != -1 && GRAVE_QUOTATION.matcher(text).matches()) {
323                 // Are we indenting ``like this'' or `this' ? If so, complain
324                 context.report(QUOTES, element, context.getLocation(element),
325                         GRAVE_QUOTE_MESSAGE, null);
326                 return;
327             }
328 
329             // Consider suggesting other types of directional quotes, such as guillemets, in
330             // other languages?
331             // There are a lot of exceptions and special cases to be considered so
332             // this will need careful implementation and testing.
333             // See http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
334         }
335 
336         // Fraction symbols?
337         if (mCheckFractions && text.indexOf('/') != -1) {
338             Matcher matcher = FRACTION_PATTERN.matcher(text);
339             if (matcher.matches()) {
340                 String top = matcher.group(1);    // Numerator
341                 String bottom = matcher.group(2); // Denominator
342                 if (top.equals("1") && bottom.equals("2")) { //$NON-NLS-1$ //$NON-NLS-2$
343                     context.report(FRACTIONS, element, context.getLocation(element),
344                             String.format(FRACTION_MESSAGE, '\u00BD', "&#189;", "1/2"), null);
345                 } else if (top.equals("1") && bottom.equals("4")) { //$NON-NLS-1$ //$NON-NLS-2$
346                     context.report(FRACTIONS, element, context.getLocation(element),
347                             String.format(FRACTION_MESSAGE, '\u00BC', "&#188;", "1/4"), null);
348                 } else if (top.equals("3") && bottom.equals("4")) { //$NON-NLS-1$ //$NON-NLS-2$
349                     context.report(FRACTIONS, element, context.getLocation(element),
350                             String.format(FRACTION_MESSAGE, '\u00BE', "&#190;", "3/4"), null);
351                 } else if (top.equals("1") && bottom.equals("3")) { //$NON-NLS-1$ //$NON-NLS-2$
352                     context.report(FRACTIONS, element, context.getLocation(element),
353                             String.format(FRACTION_MESSAGE, '\u2153', "&#8531;", "1/3"), null);
354                 } else if (top.equals("2") && bottom.equals("3")) { //$NON-NLS-1$ //$NON-NLS-2$
355                     context.report(FRACTIONS, element, context.getLocation(element),
356                             String.format(FRACTION_MESSAGE, '\u2154', "&#8532;", "2/3"), null);
357                 }
358             }
359         }
360 
361         if (mCheckMisc) {
362             // Fix copyright symbol?
363             if (text.indexOf('(') != -1
364                     && (text.contains("(c)") || text.contains("(C)"))) { //$NON-NLS-1$ //$NON-NLS-2$
365                 // Suggest replacing with copyright symbol?
366                 context.report(OTHER, element, context.getLocation(element),
367                     COPYRIGHT_MESSAGE, null);
368                 // Replace (R) and TM as well? There are unicode characters for these but they
369                 // are probably not very common within Android app strings.
370             }
371         }
372     }
373 
374     /**
375      * An object describing a single edit to be made. The offset points to a
376      * location to start editing; the length is the number of characters to
377      * delete, and the replaceWith string points to a string to insert at the
378      * offset. Note that this can model not just replacement edits but deletions
379      * (empty replaceWith) and insertions (replace length = 0) too.
380      */
381     public static class ReplaceEdit {
382         /** The offset of the edit */
383         public final int offset;
384         /** The number of characters to delete at the offset */
385         public final int length;
386         /** The characters to insert at the offset */
387         public final String replaceWith;
388 
389         /**
390          * Creates a new replace edit
391          *
392          * @param offset the offset of the edit
393          * @param length the number of characters to delete at the offset
394          * @param replaceWith the characters to insert at the offset
395          */
ReplaceEdit(int offset, int length, String replaceWith)396         public ReplaceEdit(int offset, int length, String replaceWith) {
397             super();
398             this.offset = offset;
399             this.length = length;
400             this.replaceWith = replaceWith;
401         }
402     }
403 
404     /**
405      * Returns a list of edits to be applied to fix the suggestion made by the
406      * given warning. The specific issue id and message should be the message
407      * provided by this detector in an earlier run.
408      * <p>
409      * This is intended to help tools implement automatic fixes of these
410      * warnings. The reason only the message and issue id can be provided
411      * instead of actual state passed in the data field to a reporter is that
412      * fix operation can be run much later than the lint is processed (for
413      * example, in a subsequent run of the IDE when only the warnings have been
414      * persisted),
415      *
416      * @param issueId the issue id, which should be the id for one of the
417      *            typography issues
418      * @param message the actual error message, which should be a message
419      *            provided by this detector
420      * @param textNode a text node which corresponds to the text node the
421      *            warning operated on
422      * @return a list of edits, which is never null but could be empty. The
423      *         offsets in the edit objects are relative to the text node.
424      */
getEdits(String issueId, String message, Node textNode)425     public static List<ReplaceEdit> getEdits(String issueId, String message, Node textNode) {
426         List<ReplaceEdit> edits = new ArrayList<ReplaceEdit>();
427         String text = textNode.getNodeValue();
428         if (message.equals(ELLIPSIS_MESSAGE)) {
429             int offset = text.indexOf("...");                            //$NON-NLS-1$
430             if (offset != -1) {
431                 edits.add(new ReplaceEdit(offset, 3, "\u2026"));         //$NON-NLS-1$
432             }
433         } else if (message.equals(EN_DASH_MESSAGE)) {
434             int offset = text.indexOf('-');
435             if (offset != -1) {
436                 edits.add(new ReplaceEdit(offset, 1, "\u2013"));         //$NON-NLS-1$
437             }
438         } else if (message.equals(EM_DASH_MESSAGE)) {
439             int offset = text.indexOf("--");                             //$NON-NLS-1$
440             if (offset != -1) {
441                 edits.add(new ReplaceEdit(offset, 2, "\u2014"));         //$NON-NLS-1$
442             }
443         } else if (message.equals(TYPOGRAPHIC_APOSTROPHE_MESSAGE)) {
444             int offset = text.indexOf('\'');
445             if (offset != -1) {
446                 edits.add(new ReplaceEdit(offset, 1, "\u2019"));         //$NON-NLS-1$
447             }
448         } else if (message.equals(COPYRIGHT_MESSAGE)) {
449             int offset = text.indexOf("(c)");                            //$NON-NLS-1$
450             if (offset == -1) {
451                 offset = text.indexOf("(C)");                            //$NON-NLS-1$
452             }
453             if (offset != -1) {
454                 edits.add(new ReplaceEdit(offset, 3, "\u00A9"));         //$NON-NLS-1$
455             }
456         } else if (message.equals(SINGLE_QUOTE_MESSAGE)) {
457             int offset = text.indexOf('\'');
458             if (offset != -1) {
459                 int endOffset = text.indexOf("'", offset + 1);           //$NON-NLS-1$
460                 if (endOffset != -1) {
461                     edits.add(new ReplaceEdit(offset, 1, "\u2018"));     //$NON-NLS-1$
462                     edits.add(new ReplaceEdit(endOffset, 1, "\u2019"));  //$NON-NLS-1$
463                 }
464             }
465         } else if (message.equals(DBL_QUOTES_MESSAGE)) {
466             int offset = text.indexOf('"');
467             if (offset != -1) {
468                 int endOffset = text.indexOf('"', offset + 1);
469                 if (endOffset != -1) {
470                     edits.add(new ReplaceEdit(offset, 1, "\u201C"));     //$NON-NLS-1$
471                     edits.add(new ReplaceEdit(endOffset, 1, "\u201D"));  //$NON-NLS-1$
472                 }
473             }
474         } else if (message.equals(GRAVE_QUOTE_MESSAGE)) {
475             int offset = text.indexOf('`');
476             if (offset != -1) {
477                 int endOffset = text.indexOf('\'', offset + 1);
478                 if (endOffset != -1) {
479                     edits.add(new ReplaceEdit(offset, 1, "\u2018"));     //$NON-NLS-1$
480                     edits.add(new ReplaceEdit(endOffset, 1, "\u2019"));  //$NON-NLS-1$
481                 }
482             }
483         } else {
484             Matcher matcher = Pattern.compile(FRACTION_MESSAGE_PATTERN).matcher(message);
485             if (matcher.find()) {
486                 //  "Use fraction character %1$c (%2$s) instead of %3$s ?";
487                 String replace = matcher.group(3);
488                 int offset = text.indexOf(replace);
489                 if (offset != -1) {
490                     String replaceWith = matcher.group(2);
491                     edits.add(new ReplaceEdit(offset, replace.length(), replaceWith));
492                 }
493             }
494         }
495 
496         return edits;
497     }
498 }
499