1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''Utilities used by GRIT. 7''' 8 9import codecs 10import htmlentitydefs 11import os 12import re 13import shutil 14import sys 15import tempfile 16import time 17import types 18from xml.sax import saxutils 19 20from grit import lazy_re 21 22_root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 23 24 25# Unique constants for use by ReadFile(). 26BINARY, RAW_TEXT = range(2) 27 28 29# Unique constants representing data pack encodings. 30_, UTF8, UTF16 = range(3) 31 32 33def Encode(message, encoding): 34 '''Returns a byte stream that represents |message| in the given |encoding|.''' 35 # |message| is a python unicode string, so convert to a byte stream that 36 # has the correct encoding requested for the datapacks. We skip the first 37 # 2 bytes of text resources because it is the BOM. 38 if encoding == UTF8: 39 return message.encode('utf8') 40 if encoding == UTF16: 41 return message.encode('utf16')[2:] 42 # Default is BINARY 43 return message 44 45 46# Matches all different types of linebreaks. 47LINEBREAKS = re.compile('\r\n|\n|\r') 48 49def MakeRelativePath(base_path, path_to_make_relative): 50 """Returns a relative path such from the base_path to 51 the path_to_make_relative. 52 53 In other words, os.join(base_path, 54 MakeRelativePath(base_path, path_to_make_relative)) 55 is the same location as path_to_make_relative. 56 57 Args: 58 base_path: the root path 59 path_to_make_relative: an absolute path that is on the same drive 60 as base_path 61 """ 62 63 def _GetPathAfterPrefix(prefix_path, path_with_prefix): 64 """Gets the subpath within in prefix_path for the path_with_prefix 65 with no beginning or trailing path separators. 66 67 Args: 68 prefix_path: the base path 69 path_with_prefix: a path that starts with prefix_path 70 """ 71 assert path_with_prefix.startswith(prefix_path) 72 path_without_prefix = path_with_prefix[len(prefix_path):] 73 normalized_path = os.path.normpath(path_without_prefix.strip(os.path.sep)) 74 if normalized_path == '.': 75 normalized_path = '' 76 return normalized_path 77 78 def _GetCommonBaseDirectory(*args): 79 """Returns the common prefix directory for the given paths 80 81 Args: 82 The list of paths (at least one of which should be a directory) 83 """ 84 prefix = os.path.commonprefix(args) 85 # prefix is a character-by-character prefix (i.e. it does not end 86 # on a directory bound, so this code fixes that) 87 88 # if the prefix ends with the separator, then it is prefect. 89 if len(prefix) > 0 and prefix[-1] == os.path.sep: 90 return prefix 91 92 # We need to loop through all paths or else we can get 93 # tripped up by "c:\a" and "c:\abc". The common prefix 94 # is "c:\a" which is a directory and looks good with 95 # respect to the first directory but it is clear that 96 # isn't a common directory when the second path is 97 # examined. 98 for path in args: 99 assert len(path) >= len(prefix) 100 # If the prefix the same length as the path, 101 # then the prefix must be a directory (since one 102 # of the arguements should be a directory). 103 if path == prefix: 104 continue 105 # if the character after the prefix in the path 106 # is the separator, then the prefix appears to be a 107 # valid a directory as well for the given path 108 if path[len(prefix)] == os.path.sep: 109 continue 110 # Otherwise, the prefix is not a directory, so it needs 111 # to be shortened to be one 112 index_sep = prefix.rfind(os.path.sep) 113 # The use "index_sep + 1" because it includes the final sep 114 # and it handles the case when the index_sep is -1 as well 115 prefix = prefix[:index_sep + 1] 116 # At this point we backed up to a directory bound which is 117 # common to all paths, so we can quit going through all of 118 # the paths. 119 break 120 return prefix 121 122 prefix = _GetCommonBaseDirectory(base_path, path_to_make_relative) 123 # If the paths had no commonality at all, then return the absolute path 124 # because it is the best that can be done. If the path had to be relative 125 # then eventually this absolute path will be discovered (when a build breaks) 126 # and an appropriate fix can be made, but having this allows for the best 127 # backward compatibility with the absolute path behavior in the past. 128 if len(prefix) <= 0: 129 return path_to_make_relative 130 # Build a path from the base dir to the common prefix 131 remaining_base_path = _GetPathAfterPrefix(prefix, base_path) 132 133 # The follow handles two case: "" and "foo\\bar" 134 path_pieces = remaining_base_path.split(os.path.sep) 135 base_depth_from_prefix = len([d for d in path_pieces if len(d)]) 136 base_to_prefix = (".." + os.path.sep) * base_depth_from_prefix 137 138 # Put add in the path from the prefix to the path_to_make_relative 139 remaining_other_path = _GetPathAfterPrefix(prefix, path_to_make_relative) 140 return base_to_prefix + remaining_other_path 141 142 143KNOWN_SYSTEM_IDENTIFIERS = set() 144 145SYSTEM_IDENTIFIERS = None 146 147def SetupSystemIdentifiers(ids): 148 '''Adds ids to a regexp of known system identifiers. 149 150 Can be called many times, ids will be accumulated. 151 152 Args: 153 ids: an iterable of strings 154 ''' 155 KNOWN_SYSTEM_IDENTIFIERS.update(ids) 156 global SYSTEM_IDENTIFIERS 157 SYSTEM_IDENTIFIERS = lazy_re.compile( 158 ' | '.join([r'\b%s\b' % i for i in KNOWN_SYSTEM_IDENTIFIERS]), 159 re.VERBOSE) 160 161 162# Matches all of the resource IDs predefined by Windows. 163SetupSystemIdentifiers(( 164 'IDOK', 'IDCANCEL', 'IDC_STATIC', 'IDYES', 'IDNO', 165 'ID_FILE_NEW', 'ID_FILE_OPEN', 'ID_FILE_CLOSE', 'ID_FILE_SAVE', 166 'ID_FILE_SAVE_AS', 'ID_FILE_PAGE_SETUP', 'ID_FILE_PRINT_SETUP', 167 'ID_FILE_PRINT', 'ID_FILE_PRINT_DIRECT', 'ID_FILE_PRINT_PREVIEW', 168 'ID_FILE_UPDATE', 'ID_FILE_SAVE_COPY_AS', 'ID_FILE_SEND_MAIL', 169 'ID_FILE_MRU_FIRST', 'ID_FILE_MRU_LAST', 170 'ID_EDIT_CLEAR', 'ID_EDIT_CLEAR_ALL', 'ID_EDIT_COPY', 171 'ID_EDIT_CUT', 'ID_EDIT_FIND', 'ID_EDIT_PASTE', 'ID_EDIT_PASTE_LINK', 172 'ID_EDIT_PASTE_SPECIAL', 'ID_EDIT_REPEAT', 'ID_EDIT_REPLACE', 173 'ID_EDIT_SELECT_ALL', 'ID_EDIT_UNDO', 'ID_EDIT_REDO', 174 'VS_VERSION_INFO', 'IDRETRY', 175 'ID_APP_ABOUT', 'ID_APP_EXIT', 176 'ID_NEXT_PANE', 'ID_PREV_PANE', 177 'ID_WINDOW_NEW', 'ID_WINDOW_ARRANGE', 'ID_WINDOW_CASCADE', 178 'ID_WINDOW_TILE_HORZ', 'ID_WINDOW_TILE_VERT', 'ID_WINDOW_SPLIT', 179 'ATL_IDS_SCSIZE', 'ATL_IDS_SCMOVE', 'ATL_IDS_SCMINIMIZE', 180 'ATL_IDS_SCMAXIMIZE', 'ATL_IDS_SCNEXTWINDOW', 'ATL_IDS_SCPREVWINDOW', 181 'ATL_IDS_SCCLOSE', 'ATL_IDS_SCRESTORE', 'ATL_IDS_SCTASKLIST', 182 'ATL_IDS_MDICHILD', 'ATL_IDS_IDLEMESSAGE', 'ATL_IDS_MRU_FILE' )) 183 184 185# Matches character entities, whether specified by name, decimal or hex. 186_HTML_ENTITY = lazy_re.compile( 187 '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));', 188 re.IGNORECASE) 189 190# Matches characters that should be HTML-escaped. This is <, > and &, but only 191# if the & is not the start of an HTML character entity. 192_HTML_CHARS_TO_ESCAPE = lazy_re.compile( 193 '"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', 194 re.IGNORECASE | re.MULTILINE) 195 196 197def ReadFile(filename, encoding): 198 '''Reads and returns the entire contents of the given file. 199 200 Args: 201 filename: The path to the file. 202 encoding: A Python codec name or one of two special values: BINARY to read 203 the file in binary mode, or RAW_TEXT to read it with newline 204 conversion but without decoding to Unicode. 205 ''' 206 mode = 'rb' if encoding == BINARY else 'rU' 207 with open(filename, mode) as f: 208 data = f.read() 209 if encoding not in (BINARY, RAW_TEXT): 210 data = data.decode(encoding) 211 return data 212 213 214def WrapOutputStream(stream, encoding = 'utf-8'): 215 '''Returns a stream that wraps the provided stream, making it write 216 characters using the specified encoding.''' 217 return codecs.getwriter(encoding)(stream) 218 219 220def ChangeStdoutEncoding(encoding = 'utf-8'): 221 '''Changes STDOUT to print characters using the specified encoding.''' 222 sys.stdout = WrapOutputStream(sys.stdout, encoding) 223 224 225def EscapeHtml(text, escape_quotes = False): 226 '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML 227 entities. Any existing named entity or HTML entity defined by decimal or 228 hex code will be left untouched. This is appropriate for escaping text for 229 inclusion in HTML, but not for XML. 230 ''' 231 def Replace(match): 232 if match.group() == '&': return '&' 233 elif match.group() == '<': return '<' 234 elif match.group() == '>': return '>' 235 elif match.group() == '"': 236 if escape_quotes: return '"' 237 else: return match.group() 238 else: assert False 239 out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text) 240 return out 241 242 243def UnescapeHtml(text, replace_nbsp=True): 244 '''Returns 'text' with all HTML character entities (both named character 245 entities and those specified by decimal or hexadecimal Unicode ordinal) 246 replaced by their Unicode characters (or latin1 characters if possible). 247 248 The only exception is that will not be escaped if 'replace_nbsp' is 249 False. 250 ''' 251 def Replace(match): 252 groups = match.groupdict() 253 if groups['hex']: 254 return unichr(int(groups['hex'], 16)) 255 elif groups['decimal']: 256 return unichr(int(groups['decimal'], 10)) 257 else: 258 name = groups['named'] 259 if name == 'nbsp' and not replace_nbsp: 260 return match.group() # Don't replace 261 assert name != None 262 if name in htmlentitydefs.name2codepoint.keys(): 263 return unichr(htmlentitydefs.name2codepoint[name]) 264 else: 265 return match.group() # Unknown HTML character entity - don't replace 266 267 out = _HTML_ENTITY.sub(Replace, text) 268 return out 269 270 271def EncodeCdata(cdata): 272 '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]> 273 format, depending on which is more appropriate for easy editing. The data 274 is escaped for inclusion in an XML element's body. 275 276 Args: 277 cdata: 'If x < y and y < z then x < z' 278 279 Return: 280 '<![CDATA[If x < y and y < z then x < z]]>' 281 ''' 282 if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0: 283 return '<![CDATA[%s]]>' % cdata 284 else: 285 return saxutils.escape(cdata) 286 287 288def FixupNamedParam(function, param_name, param_value): 289 '''Returns a closure that is identical to 'function' but ensures that the 290 named parameter 'param_name' is always set to 'param_value' unless explicitly 291 set by the caller. 292 293 Args: 294 function: callable 295 param_name: 'bingo' 296 param_value: 'bongo' (any type) 297 298 Return: 299 callable 300 ''' 301 def FixupClosure(*args, **kw): 302 if not param_name in kw: 303 kw[param_name] = param_value 304 return function(*args, **kw) 305 return FixupClosure 306 307 308def PathFromRoot(path): 309 '''Takes a path relative to the root directory for GRIT (the one that grit.py 310 resides in) and returns a path that is either absolute or relative to the 311 current working directory (i.e .a path you can use to open the file). 312 313 Args: 314 path: 'rel_dir\file.ext' 315 316 Return: 317 'c:\src\tools\rel_dir\file.ext 318 ''' 319 return os.path.normpath(os.path.join(_root_dir, path)) 320 321 322def ParseGrdForUnittest(body, base_dir=None): 323 '''Parse a skeleton .grd file and return it, for use in unit tests. 324 325 Args: 326 body: XML that goes inside the <release> element. 327 base_dir: The base_dir attribute of the <grit> tag. 328 ''' 329 import StringIO 330 from grit import grd_reader 331 if isinstance(body, unicode): 332 body = body.encode('utf-8') 333 if base_dir is None: 334 base_dir = PathFromRoot('.') 335 body = '''<?xml version="1.0" encoding="UTF-8"?> 336<grit latest_public_release="2" current_release="3" source_lang_id="en" base_dir="%s"> 337 <outputs> 338 </outputs> 339 <release seq="3"> 340 %s 341 </release> 342</grit>''' % (base_dir, body) 343 return grd_reader.Parse(StringIO.StringIO(body), dir=".") 344 345 346def StripBlankLinesAndComments(text): 347 '''Strips blank lines and comments from C source code, for unit tests.''' 348 return '\n'.join(line for line in text.splitlines() 349 if line and not line.startswith('//')) 350 351 352def dirname(filename): 353 '''Version of os.path.dirname() that never returns empty paths (returns 354 '.' if the result of os.path.dirname() is empty). 355 ''' 356 ret = os.path.dirname(filename) 357 if ret == '': 358 ret = '.' 359 return ret 360 361 362def normpath(path): 363 '''Version of os.path.normpath that also changes backward slashes to 364 forward slashes when not running on Windows. 365 ''' 366 # This is safe to always do because the Windows version of os.path.normpath 367 # will replace forward slashes with backward slashes. 368 path = path.replace('\\', '/') 369 return os.path.normpath(path) 370 371 372_LANGUAGE_SPLIT_RE = lazy_re.compile('-|_|/') 373 374 375def CanonicalLanguage(code): 376 '''Canonicalizes two-part language codes by using a dash and making the 377 second part upper case. Returns one-part language codes unchanged. 378 379 Args: 380 code: 'zh_cn' 381 382 Return: 383 code: 'zh-CN' 384 ''' 385 parts = _LANGUAGE_SPLIT_RE.split(code) 386 code = [ parts[0] ] 387 for part in parts[1:]: 388 code.append(part.upper()) 389 return '-'.join(code) 390 391 392_LANG_TO_CODEPAGE = { 393 'en' : 1252, 394 'fr' : 1252, 395 'it' : 1252, 396 'de' : 1252, 397 'es' : 1252, 398 'nl' : 1252, 399 'sv' : 1252, 400 'no' : 1252, 401 'da' : 1252, 402 'fi' : 1252, 403 'pt-BR' : 1252, 404 'ru' : 1251, 405 'ja' : 932, 406 'zh-TW' : 950, 407 'zh-CN' : 936, 408 'ko' : 949, 409} 410 411 412def LanguageToCodepage(lang): 413 '''Returns the codepage _number_ that can be used to represent 'lang', which 414 may be either in formats such as 'en', 'pt_br', 'pt-BR', etc. 415 416 The codepage returned will be one of the 'cpXXXX' codepage numbers. 417 418 Args: 419 lang: 'de' 420 421 Return: 422 1252 423 ''' 424 lang = CanonicalLanguage(lang) 425 if lang in _LANG_TO_CODEPAGE: 426 return _LANG_TO_CODEPAGE[lang] 427 else: 428 print "Not sure which codepage to use for %s, assuming cp1252" % lang 429 return 1252 430 431def NewClassInstance(class_name, class_type): 432 '''Returns an instance of the class specified in classname 433 434 Args: 435 class_name: the fully qualified, dot separated package + classname, 436 i.e. "my.package.name.MyClass". Short class names are not supported. 437 class_type: the class or superclass this object must implement 438 439 Return: 440 An instance of the class, or None if none was found 441 ''' 442 lastdot = class_name.rfind('.') 443 module_name = '' 444 if lastdot >= 0: 445 module_name = class_name[0:lastdot] 446 if module_name: 447 class_name = class_name[lastdot+1:] 448 module = __import__(module_name, globals(), locals(), ['']) 449 if hasattr(module, class_name): 450 class_ = getattr(module, class_name) 451 class_instance = class_() 452 if isinstance(class_instance, class_type): 453 return class_instance 454 return None 455 456 457def FixLineEnd(text, line_end): 458 # First normalize 459 text = text.replace('\r\n', '\n') 460 text = text.replace('\r', '\n') 461 # Then fix 462 text = text.replace('\n', line_end) 463 return text 464 465 466def BoolToString(bool): 467 if bool: 468 return 'true' 469 else: 470 return 'false' 471 472 473verbose = False 474extra_verbose = False 475 476def IsVerbose(): 477 return verbose 478 479def IsExtraVerbose(): 480 return extra_verbose 481 482def ParseDefine(define): 483 '''Parses a define argument and returns the name and value. 484 485 The format is either "NAME=VAL" or "NAME", using True as the default value. 486 Values of "1" and "0" are transformed to True and False respectively. 487 488 Args: 489 define: a string of the form "NAME=VAL" or "NAME". 490 491 Returns: 492 A (name, value) pair. name is a string, value a string or boolean. 493 ''' 494 parts = [part.strip() for part in define.split('=', 1)] 495 assert len(parts) >= 1 496 name = parts[0] 497 val = True 498 if len(parts) > 1: 499 val = parts[1] 500 if val == "1": val = True 501 elif val == "0": val = False 502 return (name, val) 503 504 505class Substituter(object): 506 '''Finds and substitutes variable names in text strings. 507 508 Given a dictionary of variable names and values, prepares to 509 search for patterns of the form [VAR_NAME] in a text. 510 The value will be substituted back efficiently. 511 Also applies to tclib.Message objects. 512 ''' 513 514 def __init__(self): 515 '''Create an empty substituter.''' 516 self.substitutions_ = {} 517 self.dirty_ = True 518 519 def AddSubstitutions(self, subs): 520 '''Add new values to the substitutor. 521 522 Args: 523 subs: A dictionary of new substitutions. 524 ''' 525 self.substitutions_.update(subs) 526 self.dirty_ = True 527 528 def AddMessages(self, messages, lang): 529 '''Adds substitutions extracted from node.Message objects. 530 531 Args: 532 messages: a list of node.Message objects. 533 lang: The translation language to use in substitutions. 534 ''' 535 subs = [(str(msg.attrs['name']), msg.Translate(lang)) for msg in messages] 536 self.AddSubstitutions(dict(subs)) 537 self.dirty_ = True 538 539 def GetExp(self): 540 '''Obtain a regular expression that will find substitution keys in text. 541 542 Create and cache if the substituter has been updated. Use the cached value 543 otherwise. Keys will be enclosed in [square brackets] in text. 544 545 Returns: 546 A regular expression object. 547 ''' 548 if self.dirty_: 549 components = ['\[%s\]' % (k,) for k in self.substitutions_.keys()] 550 self.exp = re.compile("(%s)" % ('|'.join(components),)) 551 self.dirty_ = False 552 return self.exp 553 554 def Substitute(self, text): 555 '''Substitute the variable values in the given text. 556 557 Text of the form [message_name] will be replaced by the message's value. 558 559 Args: 560 text: A string of text. 561 562 Returns: 563 A string of text with substitutions done. 564 ''' 565 return ''.join([self._SubFragment(f) for f in self.GetExp().split(text)]) 566 567 def _SubFragment(self, fragment): 568 '''Utility function for Substitute. 569 570 Performs a simple substitution if the fragment is exactly of the form 571 [message_name]. 572 573 Args: 574 fragment: A simple string. 575 576 Returns: 577 A string with the substitution done. 578 ''' 579 if len(fragment) > 2 and fragment[0] == '[' and fragment[-1] == ']': 580 sub = self.substitutions_.get(fragment[1:-1], None) 581 if sub is not None: 582 return sub 583 return fragment 584 585 def SubstituteMessage(self, msg): 586 '''Apply substitutions to a tclib.Message object. 587 588 Text of the form [message_name] will be replaced by a new placeholder, 589 whose presentation will take the form the message_name_{UsageCount}, and 590 whose example will be the message's value. Existing placeholders are 591 not affected. 592 593 Args: 594 msg: A tclib.Message object. 595 596 Returns: 597 A tclib.Message object, with substitutions done. 598 ''' 599 from grit import tclib # avoid circular import 600 counts = {} 601 text = msg.GetPresentableContent() 602 placeholders = [] 603 newtext = '' 604 for f in self.GetExp().split(text): 605 sub = self._SubFragment(f) 606 if f != sub: 607 f = str(f) 608 count = counts.get(f, 0) + 1 609 counts[f] = count 610 name = "%s_%d" % (f[1:-1], count) 611 placeholders.append(tclib.Placeholder(name, f, sub)) 612 newtext += name 613 else: 614 newtext += f 615 if placeholders: 616 return tclib.Message(newtext, msg.GetPlaceholders() + placeholders, 617 msg.GetDescription(), msg.GetMeaning()) 618 else: 619 return msg 620 621 622class TempDir(object): 623 '''Creates files with the specified contents in a temporary directory, 624 for unit testing. 625 ''' 626 def __init__(self, file_data): 627 self._tmp_dir_name = tempfile.mkdtemp() 628 assert not os.listdir(self.GetPath()) 629 for name, contents in file_data.items(): 630 file_path = self.GetPath(name) 631 dir_path = os.path.split(file_path)[0] 632 if not os.path.exists(dir_path): 633 os.makedirs(dir_path) 634 with open(file_path, 'w') as f: 635 f.write(file_data[name]) 636 637 def __enter__(self): 638 return self 639 640 def __exit__(self, *exc_info): 641 self.CleanUp() 642 643 def CleanUp(self): 644 shutil.rmtree(self.GetPath()) 645 646 def GetPath(self, name=''): 647 name = os.path.join(self._tmp_dir_name, name) 648 assert name.startswith(self._tmp_dir_name) 649 return name 650 651 def AsCurrentDir(self): 652 return self._AsCurrentDirClass(self.GetPath()) 653 654 class _AsCurrentDirClass(object): 655 def __init__(self, path): 656 self.path = path 657 def __enter__(self): 658 self.oldpath = os.getcwd() 659 os.chdir(self.path) 660 def __exit__(self, *exc_info): 661 os.chdir(self.oldpath) 662