1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import sys 5import os 6import re 7 8class DepsException(Exception): 9 pass 10 11""" 12The core of this script is the calc_load_sequence function. In total, this 13walks over the provided javascript files and figures out their dependencies 14using the module definitions provided in each file. This allows us to, for 15example, have a trio of modules: 16 17foo.js: 18 base.require('bar'); 19and bar.js: 20 base.require('baz'); 21 22calc_load_sequence(['foo'], '.') will yield: 23 [Module('baz'), Module('bar'), Module('foo')] 24 25which is, based on the dependencies, the correct sequence in which to load 26those modules. 27""" 28 29class ResourceFinder(object): 30 """Helper code for finding a module given a name and current module. 31 32 The dependency resolution code in Module.resolve will find bits of code in the 33 actual javascript that says things require('bar'). This 34 code is responsible for figuring out what filename corresponds to 'bar' given 35 a Module('foo'). 36 """ 37 def __init__(self, root_dir): 38 self._root_dir = root_dir 39 pass 40 41 @property 42 def root_dir(self): 43 return self._root_dir 44 45 def _find_and_load_filename(self, absolute_path): 46 if not os.path.exists(absolute_path): 47 return None, None 48 49 f = open(absolute_path, 'r') 50 contents = f.read() 51 f.close() 52 53 return absolute_path, contents 54 55 def _find_and_load(self, current_module, requested_name, extension): 56 assert current_module.filename 57 pathy_name = requested_name.replace(".", os.sep) 58 filename = pathy_name + extension 59 absolute_path = os.path.join(self._root_dir, filename) 60 return self._find_and_load_filename(absolute_path) 61 62 def find_and_load_module(self, current_module, requested_module_name): 63 return self._find_and_load(current_module, requested_module_name, ".js") 64 65 def find_and_load_raw_script(self, current_module, filename): 66 absolute_path = os.path.join(self._root_dir, filename) 67 return self._find_and_load_filename(absolute_path) 68 69 def find_and_load_style_sheet(self, 70 current_module, requested_style_sheet_name): 71 return self._find_and_load( 72 current_module, requested_style_sheet_name, ".css") 73 74 75class StyleSheet(object): 76 """Represents a stylesheet resource referenced by a module via the 77 base.requireStylesheet(xxx) directive.""" 78 def __init__(self, name, filename, contents): 79 self.name = name 80 self.filename = filename 81 self.contents = contents 82 83 def __repr__(self): 84 return "StyleSheet(%s)" % self.name 85 86class RawScript(object): 87 """Represents a raw script resource referenced by a module via the 88 base.requireRawScript(xxx) directive.""" 89 def __init__(self, name, filename, contents): 90 self.name = name 91 self.filename = filename 92 self.contents = contents 93 94 def __repr__(self): 95 return "RawScript(%s)" % self.name 96 97def _tokenize_js(text): 98 rest = text 99 tokens = ["//", "/*", "*/", "\n"] 100 while len(rest): 101 indices = [rest.find(token) for token in tokens] 102 found_indices = [index for index in indices if index >= 0] 103 104 if len(found_indices) == 0: 105 # end of string 106 yield rest 107 return 108 109 min_index = min(found_indices) 110 token_with_min = tokens[indices.index(min_index)] 111 112 if min_index > 0: 113 yield rest[:min_index] 114 115 yield rest[min_index:min_index + len(token_with_min)] 116 rest = rest[min_index + len(token_with_min):] 117 118def _strip_js_comments(text): 119 result_tokens = [] 120 token_stream = _tokenize_js(text).__iter__() 121 while True: 122 try: 123 t = token_stream.next() 124 except StopIteration: 125 break 126 127 if t == "//": 128 while True: 129 try: 130 t2 = token_stream.next() 131 if t2 == "\n": 132 break 133 except StopIteration: 134 break 135 elif t == '/*': 136 nesting = 1 137 while True: 138 try: 139 t2 = token_stream.next() 140 if t2 == "/*": 141 nesting += 1 142 elif t2 == "*/": 143 nesting -= 1 144 if nesting == 0: 145 break 146 except StopIteration: 147 break 148 else: 149 result_tokens.append(t) 150 return "".join(result_tokens) 151 152def _MangleRawScriptFilenameToModuleName(filename): 153 name = filename 154 name = name.replace(os.sep, ':') 155 name = name.replace('..', '!!') 156 return name 157 158class Module(object): 159 """Represents a javascript module. It can either be directly requested, e.g. 160 passed in by name to calc_load_sequence, or created by being referenced a 161 module via the base.require(xxx) directive. 162 163 Interesting properties on this object are: 164 165 - filename: the file of the actual module 166 - contents: the actual text contents of the module 167 - style_sheets: StyleSheet objects that this module relies on for styling 168 information. 169 - dependent_modules: other modules that this module needs in order to run 170 """ 171 def __init__(self, name = None): 172 self.name = name 173 self.filename = None 174 self.contents = None 175 176 self.dependent_module_names = [] 177 self.dependent_modules = [] 178 self.dependent_raw_script_names = [] 179 self.dependent_raw_scripts = [] 180 self.style_sheet_names = [] 181 self.style_sheets = [] 182 183 def __repr__(self): 184 return "Module(%s)" % self.name 185 186 def load_and_parse(self, module_filename, 187 module_contents = None, 188 decl_required = True): 189 if not module_contents: 190 f = open(module_filename, 'r') 191 self.contents = f.read() 192 f.close() 193 else: 194 self.contents = module_contents 195 self.filename = module_filename 196 stripped_text = _strip_js_comments(self.contents) 197 self.validate_uses_strict_mode_(stripped_text) 198 self.parse_definition_(stripped_text, decl_required) 199 200 def resolve(self, all_resources, resource_finder): 201 if "scripts" not in all_resources: 202 all_resources["scripts"] = {} 203 if "style_sheets" not in all_resources: 204 all_resources["style_sheets"] = {} 205 if "raw_scripts" not in all_resources: 206 all_resources["raw_scripts"] = {} 207 208 assert self.filename 209 210 for name in self.dependent_module_names: 211 if name in all_resources["scripts"]: 212 assert all_resources["scripts"][name].contents 213 self.dependent_modules.append(all_resources["scripts"][name]) 214 continue 215 216 filename, contents = resource_finder.find_and_load_module(self, name) 217 if not filename: 218 raise DepsException("No file for module %(name)s needed by %(dep)s" % 219 {"name": name, "dep": self.filename}) 220 221 module = Module(name) 222 all_resources["scripts"][name] = module 223 self.dependent_modules.append(module) 224 try: 225 module.load_and_parse(filename, contents) 226 except Exception, e: 227 raise Exception('While processing ' + filename + ': ' + e.message) 228 module.resolve(all_resources, resource_finder) 229 230 for name in self.dependent_raw_script_names: 231 filename, contents = resource_finder.find_and_load_raw_script(self, name) 232 if not filename: 233 raise DepsException("Could not find a file for raw script %s" % name) 234 235 if name in all_resources["raw_scripts"]: 236 assert all_resources["raw_scripts"][name].contents 237 self.dependent_raw_scripts.append(all_resources["raw_scripts"][name]) 238 continue 239 240 raw_script = RawScript(name, filename, contents) 241 all_resources["raw_scripts"][name] = raw_script 242 self.dependent_raw_scripts.append(raw_script) 243 244 for name in self.style_sheet_names: 245 if name in all_resources["style_sheets"]: 246 assert all_resources["style_sheets"][name].contents 247 self.style_sheets.append(all_resources["style_sheets"][name]) 248 continue 249 250 filename, contents = resource_finder.find_and_load_style_sheet(self, name) 251 if not filename: 252 raise DepsException("Could not find a file for stylesheet %s" % name) 253 254 style_sheet = StyleSheet(name, filename, contents) 255 all_resources["style_sheets"][name] = style_sheet 256 self.style_sheets.append(style_sheet) 257 258 def compute_load_sequence_recursive(self, load_sequence, already_loaded_set): 259 for dependent_module in self.dependent_modules: 260 dependent_module.compute_load_sequence_recursive(load_sequence, 261 already_loaded_set) 262 if self.name not in already_loaded_set: 263 already_loaded_set.add(self.name) 264 load_sequence.append(self) 265 266 def validate_uses_strict_mode_(self, stripped_text): 267 lines = stripped_text.split('\n') 268 for line in lines: 269 line = line.strip() 270 if len(line.strip()) == 0: 271 continue 272 if line.strip() == """'use strict';""": 273 break 274 raise DepsException('%s must use strict mode' % self.name) 275 276 def parse_definition_(self, stripped_text, decl_required = True): 277 if not decl_required and not self.name: 278 raise Exception("Module.name must be set for decl_required to be false.") 279 280 rest = stripped_text 281 while True: 282 # Things to search for. 283 m_r = re.search("""base\s*\.\s*require\((["'])(.+?)\\1\)""", 284 rest, re.DOTALL) 285 m_s = re.search("""base\s*\.\s*requireStylesheet\((["'])(.+?)\\1\)""", 286 rest, re.DOTALL) 287 m_irs = re.search("""base\s*\.\s*requireRawScript\((["'])(.+?)\\1\)""", 288 rest, re.DOTALL) 289 matches = [m for m in [m_r, m_s, m_irs] if m] 290 291 # Figure out which was first. 292 matches.sort(key=lambda x: x.start()) 293 if len(matches): 294 m = matches[0] 295 else: 296 break 297 298 if m == m_r: 299 dependent_module_name = m.group(2) 300 if '/' in dependent_module_name: 301 raise DepsException("Slashes are not allowed in module names. " 302 "Use '.' instead: %s" % dependent_module_name) 303 if dependent_module_name.endswith('js'): 304 raise DepsException("module names shouldn't end with .js" 305 "The module system will append that for you: %s" % 306 dependent_module_name) 307 self.dependent_module_names.append(dependent_module_name) 308 elif m == m_s: 309 style_sheet_name = m.group(2) 310 if '/' in style_sheet_name: 311 raise DepsException("Slashes are not allowed in style sheet names. " 312 "Use '.' instead: %s" % style_sheet_name) 313 if style_sheet_name.endswith('.css'): 314 raise DepsException("Style sheets should not end in .css. " 315 "The module system will append that for you" % 316 style_sheet_name) 317 self.style_sheet_names.append(style_sheet_name) 318 elif m == m_irs: 319 name = m.group(2) 320 self.dependent_raw_script_names.append(name) 321 322 rest = rest[m.end():] 323 324 325def calc_load_sequence(filenames, toplevel_dir): 326 """Given a list of starting javascript files, figure out all the Module 327 objects that need to be loaded to satisfiy their dependencies. 328 329 The javascript files shoud specify their dependencies in a format that is 330 textually equivalent to base.js' require syntax, namely: 331 332 base.require(module1); 333 base.require(module2); 334 base.requireStylesheet(stylesheet); 335 336 The output of this function is an array of Module objects ordered by 337 dependency. 338 """ 339 all_resources = {} 340 all_resources["scripts"] = {} 341 resource_finder = ResourceFinder(os.path.abspath(toplevel_dir)) 342 initial_module_name_indices = {} 343 for filename in filenames: 344 if not os.path.exists(filename): 345 raise Exception("Could not find %s" % filename) 346 347 rel_filename = os.path.relpath(filename, toplevel_dir) 348 dirname = os.path.dirname(rel_filename) 349 modname = os.path.splitext(os.path.basename(rel_filename))[0] 350 if len(dirname): 351 name = dirname.replace('/', '.') + '.' + modname 352 else: 353 name = modname 354 355 if name in all_resources["scripts"]: 356 continue 357 358 module = Module(name) 359 initial_module_name_indices[module.name] = len(initial_module_name_indices) 360 module.load_and_parse(filename, decl_required = False) 361 all_resources["scripts"][module.name] = module 362 module.resolve(all_resources, resource_finder) 363 364 # Find the root modules: ones that have no dependencies. While doing that, 365 # sort the dependent module list so that the computed load order is stable. 366 module_ref_counts = {} 367 for module in all_resources["scripts"].values(): 368 module.dependent_modules.sort(lambda x, y: cmp(x.name, y.name)) 369 module_ref_counts[module.name] = 0 370 371 def inc_ref_count(name): 372 module_ref_counts[name] = module_ref_counts[name] + 1 373 for module in all_resources["scripts"].values(): 374 for dependent_module in module.dependent_modules: 375 inc_ref_count(dependent_module.name) 376 377 root_modules = [all_resources["scripts"][name] 378 for name, ref_count in module_ref_counts.items() 379 if ref_count == 0] 380 381 # Sort root_modules by the order they were originally requested, 382 # then sort everything else by name. 383 def compare_root_module(x, y): 384 n = len(initial_module_name_indices); 385 iX = initial_module_name_indices.get(x.name, n) 386 iY = initial_module_name_indices.get(y.name, n) 387 if cmp(iX, iY) != 0: 388 return cmp(iX, iY) 389 return cmp(x.name, y.name) 390 root_modules.sort(compare_root_module) 391 392 already_loaded_set = set() 393 load_sequence = [] 394 for module in root_modules: 395 module.compute_load_sequence_recursive(load_sequence, already_loaded_set) 396 return load_sequence 397