1#!/usr/bin/env python 2# 3# Copyright 2012 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30# This is a utility for converting JavaScript source code into C-style 31# char arrays. It is used for embedded JavaScript code in the V8 32# library. 33 34import os, re 35import optparse 36import jsmin 37import textwrap 38 39 40class Error(Exception): 41 def __init__(self, msg): 42 Exception.__init__(self, msg) 43 44 45def ToCArray(byte_sequence): 46 result = [] 47 for chr in byte_sequence: 48 result.append(str(ord(chr))) 49 joined = ", ".join(result) 50 return textwrap.fill(joined, 80) 51 52 53def RemoveCommentsEmptyLinesAndWhitespace(lines): 54 lines = re.sub(r'\n+', '\n', lines) # empty lines 55 lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments 56 lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments. 57 lines = re.sub(r'\s+\n', '\n', lines) # trailing whitespace 58 lines = re.sub(r'\n\s+', '\n', lines) # initial whitespace 59 return lines 60 61 62def ReadFile(filename): 63 file = open(filename, "rt") 64 try: 65 lines = file.read() 66 finally: 67 file.close() 68 return lines 69 70 71EVAL_PATTERN = re.compile(r'\beval\s*\(') 72WITH_PATTERN = re.compile(r'\bwith\s*\(') 73INVALID_ERROR_MESSAGE_PATTERN = re.compile( 74 r'Make(?!Generic)\w*Error\(([kA-Z]\w+)') 75NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))') 76 77def Validate(lines): 78 # Because of simplified context setup, eval and with is not 79 # allowed in the natives files. 80 if EVAL_PATTERN.search(lines): 81 raise Error("Eval disallowed in natives.") 82 if WITH_PATTERN.search(lines): 83 raise Error("With statements disallowed in natives.") 84 invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines) 85 if invalid_error: 86 raise Error("Unknown error message template '%s'" % invalid_error.group(1)) 87 if NEW_ERROR_PATTERN.search(lines): 88 raise Error("Error constructed without message template.") 89 # Pass lines through unchanged. 90 return lines 91 92 93def ExpandConstants(lines, constants): 94 for key, value in constants: 95 lines = key.sub(str(value), lines) 96 return lines 97 98 99def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander): 100 pattern_match = name_pattern.search(lines, pos) 101 while pattern_match is not None: 102 # Scan over the arguments 103 height = 1 104 start = pattern_match.start() 105 end = pattern_match.end() 106 assert lines[end - 1] == '(' 107 last_match = end 108 arg_index = [0] # Wrap state into array, to work around Python "scoping" 109 mapping = { } 110 def add_arg(str): 111 # Remember to expand recursively in the arguments 112 if arg_index[0] >= len(macro.args): 113 lineno = lines.count(os.linesep, 0, start) + 1 114 raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern)) 115 replacement = expander(str.strip()) 116 mapping[macro.args[arg_index[0]]] = replacement 117 arg_index[0] += 1 118 while end < len(lines) and height > 0: 119 # We don't count commas at higher nesting levels. 120 if lines[end] == ',' and height == 1: 121 add_arg(lines[last_match:end]) 122 last_match = end + 1 123 elif lines[end] in ['(', '{', '[']: 124 height = height + 1 125 elif lines[end] in [')', '}', ']']: 126 height = height - 1 127 end = end + 1 128 # Remember to add the last match. 129 add_arg(lines[last_match:end-1]) 130 if arg_index[0] < len(macro.args) -1: 131 lineno = lines.count(os.linesep, 0, start) + 1 132 raise Error('line %s: Too few arguments for macro "%s"' % (lineno, name_pattern.pattern)) 133 result = macro.expand(mapping) 134 # Replace the occurrence of the macro with the expansion 135 lines = lines[:start] + result + lines[end:] 136 pattern_match = name_pattern.search(lines, start + len(result)) 137 return lines 138 139def ExpandMacros(lines, macros): 140 # We allow macros to depend on the previously declared macros, but 141 # we don't allow self-dependecies or recursion. 142 for name_pattern, macro in reversed(macros): 143 def expander(s): 144 return ExpandMacros(s, macros) 145 lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander) 146 return lines 147 148class TextMacro: 149 def __init__(self, args, body): 150 self.args = args 151 self.body = body 152 def expand(self, mapping): 153 # Keys could be substrings of earlier values. To avoid unintended 154 # clobbering, apply all replacements simultaneously. 155 any_key_pattern = "|".join(re.escape(k) for k in mapping.iterkeys()) 156 def replace(match): 157 return mapping[match.group(0)] 158 return re.sub(any_key_pattern, replace, self.body) 159 160CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$') 161MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') 162 163 164def ReadMacros(lines): 165 constants = [] 166 macros = [] 167 for line in lines.split('\n'): 168 hash = line.find('#') 169 if hash != -1: line = line[:hash] 170 line = line.strip() 171 if len(line) is 0: continue 172 const_match = CONST_PATTERN.match(line) 173 if const_match: 174 name = const_match.group(1) 175 value = const_match.group(2).strip() 176 constants.append((re.compile("\\b%s\\b" % name), value)) 177 else: 178 macro_match = MACRO_PATTERN.match(line) 179 if macro_match: 180 name = macro_match.group(1) 181 args = [match.strip() for match in macro_match.group(2).split(',')] 182 body = macro_match.group(3).strip() 183 macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body))) 184 else: 185 raise Error("Illegal line: " + line) 186 return (constants, macros) 187 188 189TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),') 190 191def ReadMessageTemplates(lines): 192 templates = [] 193 index = 0 194 for line in lines.split('\n'): 195 template_match = TEMPLATE_PATTERN.match(line) 196 if template_match: 197 name = "k%s" % template_match.group(1) 198 value = index 199 index = index + 1 200 templates.append((re.compile("\\b%s\\b" % name), value)) 201 return templates 202 203INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n') 204INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n') 205 206def ExpandInlineMacros(lines): 207 pos = 0 208 while True: 209 macro_match = INLINE_MACRO_PATTERN.search(lines, pos) 210 if macro_match is None: 211 # no more macros 212 return lines 213 name = macro_match.group(1) 214 args = [match.strip() for match in macro_match.group(2).split(',')] 215 end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end()); 216 if end_macro_match is None: 217 raise Error("Macro %s unclosed" % name) 218 body = lines[macro_match.end():end_macro_match.start()] 219 220 # remove macro definition 221 lines = lines[:macro_match.start()] + lines[end_macro_match.end():] 222 name_pattern = re.compile("\\b%s\\(" % name) 223 macro = TextMacro(args, body) 224 225 # advance position to where the macro definition was 226 pos = macro_match.start() 227 228 def non_expander(s): 229 return s 230 lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander) 231 232 233INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n') 234 235def ExpandInlineConstants(lines): 236 pos = 0 237 while True: 238 const_match = INLINE_CONSTANT_PATTERN.search(lines, pos) 239 if const_match is None: 240 # no more constants 241 return lines 242 name = const_match.group(1) 243 replacement = const_match.group(2) 244 name_pattern = re.compile("\\b%s\\b" % name) 245 246 # remove constant definition and replace 247 lines = (lines[:const_match.start()] + 248 re.sub(name_pattern, replacement, lines[const_match.end():])) 249 250 # advance position to where the constant definition was 251 pos = const_match.start() 252 253 254HEADER_TEMPLATE = """\ 255// Copyright 2011 Google Inc. All Rights Reserved. 256 257// This file was generated from .js source files by GYP. If you 258// want to make changes to this file you should either change the 259// javascript source files or the GYP script. 260 261#include "src/v8.h" 262#include "src/snapshot/natives.h" 263#include "src/utils.h" 264 265namespace v8 { 266namespace internal { 267 268%(sources_declaration)s\ 269 270 template <> 271 int NativesCollection<%(type)s>::GetBuiltinsCount() { 272 return %(builtin_count)i; 273 } 274 275 template <> 276 int NativesCollection<%(type)s>::GetDebuggerCount() { 277 return %(debugger_count)i; 278 } 279 280 template <> 281 int NativesCollection<%(type)s>::GetIndex(const char* name) { 282%(get_index_cases)s\ 283 return -1; 284 } 285 286 template <> 287 Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) { 288%(get_script_source_cases)s\ 289 return Vector<const char>("", 0); 290 } 291 292 template <> 293 Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) { 294%(get_script_name_cases)s\ 295 return Vector<const char>("", 0); 296 } 297 298 template <> 299 Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() { 300 return Vector<const char>(sources, %(total_length)i); 301 } 302} // internal 303} // v8 304""" 305 306SOURCES_DECLARATION = """\ 307 static const char sources[] = { %s }; 308""" 309 310 311GET_INDEX_CASE = """\ 312 if (strcmp(name, "%(id)s") == 0) return %(i)i; 313""" 314 315 316GET_SCRIPT_SOURCE_CASE = """\ 317 if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i); 318""" 319 320 321GET_SCRIPT_NAME_CASE = """\ 322 if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i); 323""" 324 325 326def BuildFilterChain(macro_filename, message_template_file): 327 """Build the chain of filter functions to be applied to the sources. 328 329 Args: 330 macro_filename: Name of the macro file, if any. 331 332 Returns: 333 A function (string -> string) that processes a source file. 334 """ 335 filter_chain = [] 336 337 if macro_filename: 338 (consts, macros) = ReadMacros(ReadFile(macro_filename)) 339 filter_chain.append(lambda l: ExpandMacros(l, macros)) 340 filter_chain.append(lambda l: ExpandConstants(l, consts)) 341 342 if message_template_file: 343 message_templates = ReadMessageTemplates(ReadFile(message_template_file)) 344 filter_chain.append(lambda l: ExpandConstants(l, message_templates)) 345 346 filter_chain.extend([ 347 RemoveCommentsEmptyLinesAndWhitespace, 348 ExpandInlineMacros, 349 ExpandInlineConstants, 350 Validate, 351 jsmin.JavaScriptMinifier().JSMinify 352 ]) 353 354 def chain(f1, f2): 355 return lambda x: f2(f1(x)) 356 357 return reduce(chain, filter_chain) 358 359def BuildExtraFilterChain(): 360 return lambda x: RemoveCommentsEmptyLinesAndWhitespace(Validate(x)) 361 362class Sources: 363 def __init__(self): 364 self.names = [] 365 self.modules = [] 366 self.is_debugger_id = [] 367 368 369def IsDebuggerFile(filename): 370 return os.path.basename(os.path.dirname(filename)) == "debug" 371 372def IsMacroFile(filename): 373 return filename.endswith("macros.py") 374 375def IsMessageTemplateFile(filename): 376 return filename.endswith("messages.h") 377 378 379def PrepareSources(source_files, native_type, emit_js): 380 """Read, prepare and assemble the list of source files. 381 382 Args: 383 source_files: List of JavaScript-ish source files. A file named macros.py 384 will be treated as a list of macros. 385 native_type: String corresponding to a NativeType enum value, allowing us 386 to treat different types of sources differently. 387 emit_js: True if we should skip the byte conversion and just leave the 388 sources as JS strings. 389 390 Returns: 391 An instance of Sources. 392 """ 393 macro_file = None 394 macro_files = filter(IsMacroFile, source_files) 395 assert len(macro_files) in [0, 1] 396 if macro_files: 397 source_files.remove(macro_files[0]) 398 macro_file = macro_files[0] 399 400 message_template_file = None 401 message_template_files = filter(IsMessageTemplateFile, source_files) 402 assert len(message_template_files) in [0, 1] 403 if message_template_files: 404 source_files.remove(message_template_files[0]) 405 message_template_file = message_template_files[0] 406 407 filters = None 408 if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"): 409 filters = BuildExtraFilterChain() 410 else: 411 filters = BuildFilterChain(macro_file, message_template_file) 412 413 # Sort 'debugger' sources first. 414 source_files = sorted(source_files, 415 lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l)) 416 417 source_files_and_contents = [(f, ReadFile(f)) for f in source_files] 418 419 # Have a single not-quite-empty source file if there are none present; 420 # otherwise you get errors trying to compile an empty C++ array. 421 # It cannot be empty (or whitespace, which gets trimmed to empty), as 422 # the deserialization code assumes each file is nonempty. 423 if not source_files_and_contents: 424 source_files_and_contents = [("dummy.js", "(function() {})")] 425 426 result = Sources() 427 428 for (source, contents) in source_files_and_contents: 429 try: 430 lines = filters(contents) 431 except Error as e: 432 raise Error("In file %s:\n%s" % (source, str(e))) 433 434 result.modules.append(lines) 435 436 is_debugger = IsDebuggerFile(source) 437 result.is_debugger_id.append(is_debugger) 438 439 name = os.path.basename(source)[:-3] 440 result.names.append(name) 441 442 return result 443 444 445def BuildMetadata(sources, source_bytes, native_type): 446 """Build the meta data required to generate a libaries file. 447 448 Args: 449 sources: A Sources instance with the prepared sources. 450 source_bytes: A list of source bytes. 451 (The concatenation of all sources; might be compressed.) 452 native_type: The parameter for the NativesCollection template. 453 454 Returns: 455 A dictionary for use with HEADER_TEMPLATE. 456 """ 457 total_length = len(source_bytes) 458 raw_sources = "".join(sources.modules) 459 460 # The sources are expected to be ASCII-only. 461 assert not filter(lambda value: ord(value) >= 128, raw_sources) 462 463 # Loop over modules and build up indices into the source blob: 464 get_index_cases = [] 465 get_script_name_cases = [] 466 get_script_source_cases = [] 467 offset = 0 468 for i in xrange(len(sources.modules)): 469 native_name = "native %s.js" % sources.names[i] 470 d = { 471 "i": i, 472 "id": sources.names[i], 473 "name": native_name, 474 "length": len(native_name), 475 "offset": offset, 476 "source_length": len(sources.modules[i]), 477 } 478 get_index_cases.append(GET_INDEX_CASE % d) 479 get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d) 480 get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d) 481 offset += len(sources.modules[i]) 482 assert offset == len(raw_sources) 483 484 metadata = { 485 "builtin_count": len(sources.modules), 486 "debugger_count": sum(sources.is_debugger_id), 487 "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes), 488 "total_length": total_length, 489 "get_index_cases": "".join(get_index_cases), 490 "get_script_source_cases": "".join(get_script_source_cases), 491 "get_script_name_cases": "".join(get_script_name_cases), 492 "type": native_type, 493 } 494 return metadata 495 496 497def PutInt(blob_file, value): 498 assert(value >= 0 and value < (1 << 28)) 499 if (value < 1 << 6): 500 size = 1 501 elif (value < 1 << 14): 502 size = 2 503 elif (value < 1 << 22): 504 size = 3 505 else: 506 size = 4 507 value_with_length = (value << 2) | (size - 1) 508 509 byte_sequence = bytearray() 510 for i in xrange(size): 511 byte_sequence.append(value_with_length & 255) 512 value_with_length >>= 8; 513 blob_file.write(byte_sequence) 514 515 516def PutStr(blob_file, value): 517 PutInt(blob_file, len(value)); 518 blob_file.write(value); 519 520 521def WriteStartupBlob(sources, startup_blob): 522 """Write a startup blob, as expected by V8 Initialize ... 523 TODO(vogelheim): Add proper method name. 524 525 Args: 526 sources: A Sources instance with the prepared sources. 527 startup_blob_file: Name of file to write the blob to. 528 """ 529 output = open(startup_blob, "wb") 530 531 debug_sources = sum(sources.is_debugger_id); 532 PutInt(output, debug_sources) 533 for i in xrange(debug_sources): 534 PutStr(output, sources.names[i]); 535 PutStr(output, sources.modules[i]); 536 537 PutInt(output, len(sources.names) - debug_sources) 538 for i in xrange(debug_sources, len(sources.names)): 539 PutStr(output, sources.names[i]); 540 PutStr(output, sources.modules[i]); 541 542 output.close() 543 544 545def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js): 546 prepared_sources = PrepareSources(sources, native_type, emit_js) 547 sources_output = "".join(prepared_sources.modules) 548 metadata = BuildMetadata(prepared_sources, sources_output, native_type) 549 550 # Optionally emit raw file. 551 if raw_file: 552 output = open(raw_file, "w") 553 output.write(sources_output) 554 output.close() 555 556 if startup_blob: 557 WriteStartupBlob(prepared_sources, startup_blob) 558 559 # Emit resulting source file. 560 output = open(target, "w") 561 if emit_js: 562 output.write(sources_output) 563 else: 564 output.write(HEADER_TEMPLATE % metadata) 565 output.close() 566 567 568def main(): 569 parser = optparse.OptionParser() 570 parser.add_option("--raw", 571 help="file to write the processed sources array to.") 572 parser.add_option("--startup_blob", 573 help="file to write the startup blob to.") 574 parser.add_option("--js", 575 help="writes a JS file output instead of a C file", 576 action="store_true", default=False, dest='js') 577 parser.add_option("--nojs", action="store_false", default=False, dest='js') 578 parser.set_usage("""js2c out.cc type sources.js ... 579 out.cc: C code to be generated. 580 type: type parameter for NativesCollection template. 581 sources.js: JS internal sources or macros.py.""") 582 (options, args) = parser.parse_args() 583 JS2C(args[2:], 584 args[0], 585 args[1], 586 options.raw, 587 options.startup_blob, 588 options.js) 589 590 591if __name__ == "__main__": 592 main() 593