1#!/usr/bin/env python3 2#-*- coding: utf-8 -*- 3 4# Copyright (C) 2018 The Android Open Source Project 5# 6# Licensed under the Apache License, Version 2.0 (the 'License'); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an 'AS IS' BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18""" 19Enforces common Android string best-practices. It ignores lint messages from 20a previous strings file, if provided. 21 22Usage: stringslint.py strings.xml 23Usage: stringslint.py strings.xml old_strings.xml 24 25In general: 26* Errors signal issues that must be fixed before submitting, and are only 27 used when there are no false-positives. 28* Warnings signal issues that might need to be fixed, but need manual 29 inspection due to risk of false-positives. 30* Info signal issues that should be fixed to match best-practices, such 31 as providing comments to aid translation. 32""" 33 34import re, sys, codecs 35import lxml.etree as ET 36 37BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) 38 39def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): 40 # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes 41 codes = [] 42 if reset: codes.append("0") 43 else: 44 if not fg is None: codes.append("3%d" % (fg)) 45 if not bg is None: 46 if not bright: codes.append("4%d" % (bg)) 47 else: codes.append("10%d" % (bg)) 48 if bold: codes.append("1") 49 elif dim: codes.append("2") 50 else: codes.append("22") 51 return "\033[%sm" % (";".join(codes)) 52 53warnings = None 54 55def warn(tag, msg, actual, expected, color=YELLOW): 56 global warnings 57 key = "%s:%d" % (tag.attrib["name"], hash(msg)) 58 value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), 59 tag.sourceline, 60 tag.attrib["name"], 61 format(reset=True), 62 msg) 63 if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True), 64 actual, 65 format(reset=True)) 66 if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True), 67 expected, 68 format(reset=True)) 69 warnings[key] = value 70 71 72def error(tag, msg, actual, expected): 73 warn(tag, msg, actual, expected, RED) 74 75def info(tag, msg, actual, expected): 76 warn(tag, msg, actual, expected, CYAN) 77 78# Escaping logic borrowed from https://stackoverflow.com/a/24519338 79ESCAPE_SEQUENCE_RE = re.compile(r''' 80 ( \\U........ # 8-digit hex escapes 81 | \\u.... # 4-digit hex escapes 82 | \\x.. # 2-digit hex escapes 83 | \\[0-7]{1,3} # Octal escapes 84 | \\N\{[^}]+\} # Unicode characters by name 85 | \\[\\'"abfnrtv] # Single-character escapes 86 )''', re.UNICODE | re.VERBOSE) 87 88def decode_escapes(s): 89 def decode_match(match): 90 return codecs.decode(match.group(0), 'unicode-escape') 91 92 s = re.sub(r"\n\s*", " ", s) 93 s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) 94 s = re.sub(r"%(\d+\$)?[a-z]", "____", s) 95 s = re.sub(r"\^\d+", "____", s) 96 s = re.sub(r"<br/?>", "\n", s) 97 s = re.sub(r"</?[a-z]+>", "", s) 98 return s 99 100def sample_iter(tag): 101 if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: 102 yield tag.attrib["example"] 103 elif tag.text: 104 yield decode_escapes(tag.text) 105 for e in tag: 106 for v in sample_iter(e): 107 yield v 108 if e.tail: 109 yield decode_escapes(e.tail) 110 111def lint(path): 112 global warnings 113 warnings = {} 114 115 with open(path) as f: 116 raw = f.read() 117 if len(raw.strip()) == 0: 118 return warnings 119 tree = ET.fromstring(bytes(raw, encoding='utf-8')) 120 root = tree #tree.getroot() 121 122 last_comment = None 123 for child in root: 124 # TODO: handle plurals 125 if isinstance(child, ET._Comment): 126 last_comment = child 127 elif child.tag == "string": 128 # We always consume comment 129 comment = last_comment 130 last_comment = None 131 132 # Prepare string for analysis 133 text = "".join(child.itertext()) 134 sample = "".join(sample_iter(child)).strip().strip("'\"") 135 136 # Validate comment 137 if comment is None: 138 info(child, "Missing string comment to aid translation", 139 None, None) 140 continue 141 if "do not translate" in comment.text.lower(): 142 continue 143 if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": 144 continue 145 146 misspelled_attributes = [ 147 ("translateable", "translatable"), 148 ] 149 for misspelling, expected in misspelled_attributes: 150 if misspelling in child.attrib: 151 error(child, "Misspelled <string> attribute.", misspelling, expected) 152 153 limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) 154 if limit is None: 155 info(child, "Missing CHAR LIMIT to aid translation", 156 repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") 157 elif re.match("\d+", limit.group(1)): 158 limit = int(limit.group(1)) 159 if len(sample) > limit: 160 warn(child, "Expanded string length is larger than CHAR LIMIT", 161 sample, None) 162 163 # Look for common mistakes/substitutions 164 if "'" in text: 165 error(child, "Turned quotation mark glyphs are more polished", 166 text, "This doesn\u2019t need to \u2018happen\u2019 today") 167 if '"' in text and not text.startswith('"') and text.endswith('"'): 168 error(child, "Turned quotation mark glyphs are more polished", 169 text, "This needs to \u201chappen\u201d today") 170 if "..." in text: 171 error(child, "Ellipsis glyph is more polished", 172 text, "Loading\u2026") 173 if "wi-fi" in text.lower(): 174 error(child, "Non-breaking glyph is more polished", 175 text, "Wi\u2011Fi") 176 if "wifi" in text.lower(): 177 error(child, "Using non-standard spelling", 178 text, "Wi\u2011Fi") 179 if re.search("\d-\d", text): 180 warn(child, "Ranges should use en dash glyph", 181 text, "You will find this material in chapters 8\u201312") 182 if "--" in text: 183 warn(child, "Phrases should use em dash glyph", 184 text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.") 185 if ". " in text: 186 warn(child, "Only use single space between sentences", 187 text, "First idea. Second idea.") 188 if re.match(r"^[A-Z\s]{5,}$", text): 189 warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", 190 text, "Refresh data") 191 if " phone " in text and "product" not in child.attrib: 192 warn(child, "Strings mentioning phones should have variants for tablets", 193 text, None) 194 195 # When more than one substitution, require indexes 196 if len(re.findall("%[^%]", text)) > 1: 197 if len(re.findall("%[^\d]", text)) > 0: 198 error(child, "Substitutions must be indexed", 199 text, "Add %1$s to %2$s") 200 201 # Require xliff substitutions 202 for gc in child.iter(): 203 badsub = False 204 if gc.tail and re.search("%[^%]", gc.tail): badsub = True 205 if re.match("{.*xliff.*}g", gc.tag): 206 if "id" not in gc.attrib: 207 error(child, "Substitutions must define id attribute", 208 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 209 if "example" not in gc.attrib: 210 error(child, "Substitutions must define example attribute", 211 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 212 else: 213 if gc.text and re.search("%[^%]", gc.text): badsub = True 214 if badsub: 215 error(child, "Substitutions must be inside xliff tags", 216 text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 217 218 return warnings 219 220if len(sys.argv) > 2: 221 before = lint(sys.argv[2]) 222else: 223 before = {} 224after = lint(sys.argv[1]) 225 226for b in before: 227 if b in after: 228 del after[b] 229 230if len(after) > 0: 231 for a in sorted(after.keys()): 232 print(after[a]) 233 print() 234 sys.exit(1) 235