#!/usr/bin/env python3 #-*- coding: utf-8 -*- # Copyright (C) 2018 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Enforces common Android string best-practices. It ignores lint messages from a previous strings file, if provided. Usage: stringslint.py strings.xml Usage: stringslint.py strings.xml old_strings.xml In general: * Errors signal issues that must be fixed before submitting, and are only used when there are no false-positives. * Warnings signal issues that might need to be fixed, but need manual inspection due to risk of false-positives. * Info signal issues that should be fixed to match best-practices, such as providing comments to aid translation. """ import re, sys, codecs import lxml.etree as ET BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes codes = [] if reset: codes.append("0") else: if not fg is None: codes.append("3%d" % (fg)) if not bg is None: if not bright: codes.append("4%d" % (bg)) else: codes.append("10%d" % (bg)) if bold: codes.append("1") elif dim: codes.append("2") else: codes.append("22") return "\033[%sm" % (";".join(codes)) warnings = None def warn(tag, msg, actual, expected, color=YELLOW): global warnings key = "%s:%d" % (tag.attrib["name"], hash(msg)) value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), tag.sourceline, tag.attrib["name"], format(reset=True), msg) if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True), actual, format(reset=True)) if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True), expected, format(reset=True)) warnings[key] = value def error(tag, msg, actual, expected): warn(tag, msg, actual, expected, RED) def info(tag, msg, actual, expected): warn(tag, msg, actual, expected, CYAN) # Escaping logic borrowed from https://stackoverflow.com/a/24519338 ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): return codecs.decode(match.group(0), 'unicode-escape') s = re.sub(r"\n\s*", " ", s) s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) s = re.sub(r"%(\d+\$)?[a-z]", "____", s) s = re.sub(r"\^\d+", "____", s) s = re.sub(r"
", "\n", s) s = re.sub(r"", "", s) return s def sample_iter(tag): if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: yield tag.attrib["example"] elif tag.text: yield decode_escapes(tag.text) for e in tag: for v in sample_iter(e): yield v if e.tail: yield decode_escapes(e.tail) def lint(path): global warnings warnings = {} with open(path) as f: raw = f.read() if len(raw.strip()) == 0: return warnings tree = ET.fromstring(bytes(raw, encoding='utf-8')) root = tree #tree.getroot() last_comment = None for child in root: # TODO: handle plurals if isinstance(child, ET._Comment): last_comment = child elif child.tag == "string": # We always consume comment comment = last_comment last_comment = None # Prepare string for analysis text = "".join(child.itertext()) sample = "".join(sample_iter(child)).strip().strip("'\"") # Validate comment if comment is None: info(child, "Missing string comment to aid translation", None, None) continue if "do not translate" in comment.text.lower(): continue if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": continue misspelled_attributes = [ ("translateable", "translatable"), ] for misspelling, expected in misspelled_attributes: if misspelling in child.attrib: error(child, "Misspelled attribute.", misspelling, expected) limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) if limit is None: info(child, "Missing CHAR LIMIT to aid translation", repr(comment), "") elif re.match("\d+", limit.group(1)): limit = int(limit.group(1)) if len(sample) > limit: warn(child, "Expanded string length is larger than CHAR LIMIT", sample, None) # Look for common mistakes/substitutions if "'" in text: error(child, "Turned quotation mark glyphs are more polished", text, "This doesn\u2019t need to \u2018happen\u2019 today") if '"' in text and not text.startswith('"') and text.endswith('"'): error(child, "Turned quotation mark glyphs are more polished", text, "This needs to \u201chappen\u201d today") if "..." in text: error(child, "Ellipsis glyph is more polished", text, "Loading\u2026") if "wi-fi" in text.lower(): error(child, "Non-breaking glyph is more polished", text, "Wi\u2011Fi") if "wifi" in text.lower(): error(child, "Using non-standard spelling", text, "Wi\u2011Fi") if re.search("\d-\d", text): warn(child, "Ranges should use en dash glyph", text, "You will find this material in chapters 8\u201312") if "--" in text: warn(child, "Phrases should use em dash glyph", text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.") if ". " in text: warn(child, "Only use single space between sentences", text, "First idea. Second idea.") if re.match(r"^[A-Z\s]{5,}$", text): warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", text, "Refresh data") if " phone " in text and "product" not in child.attrib: warn(child, "Strings mentioning phones should have variants for tablets", text, None) # When more than one substitution, require indexes if len(re.findall("%[^%]", text)) > 1: if len(re.findall("%[^\d]", text)) > 0: error(child, "Substitutions must be indexed", text, "Add %1$s to %2$s") # Require xliff substitutions for gc in child.iter(): badsub = False if gc.tail and re.search("%[^%]", gc.tail): badsub = True if re.match("{.*xliff.*}g", gc.tag): if "id" not in gc.attrib: error(child, "Substitutions must define id attribute", None, "%1$s") if "example" not in gc.attrib: error(child, "Substitutions must define example attribute", None, "%1$s") else: if gc.text and re.search("%[^%]", gc.text): badsub = True if badsub: error(child, "Substitutions must be inside xliff tags", text, "%1$s") return warnings if len(sys.argv) > 2: before = lint(sys.argv[2]) else: before = {} after = lint(sys.argv[1]) for b in before: if b in after: del after[b] if len(after) > 0: for a in sorted(after.keys()): print(after[a]) print() sys.exit(1)