• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30# This is a utility for converting JavaScript source code into C-style
31# char arrays. It is used for embedded JavaScript code in the V8
32# library.
33
34import os, re
35import optparse
36import jsmin
37import textwrap
38
39
40class Error(Exception):
41  def __init__(self, msg):
42    Exception.__init__(self, msg)
43
44
45def ToCArray(byte_sequence):
46  result = []
47  for chr in byte_sequence:
48    result.append(str(ord(chr)))
49  joined = ", ".join(result)
50  return textwrap.fill(joined, 80)
51
52
53def RemoveCommentsEmptyLinesAndWhitespace(lines):
54  lines = re.sub(r'\n+', '\n', lines) # empty lines
55  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
56  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
57  lines = re.sub(r'\s+\n', '\n', lines) # trailing whitespace
58  lines = re.sub(r'\n\s+', '\n', lines) # initial whitespace
59  return lines
60
61
62def ReadFile(filename):
63  file = open(filename, "rt")
64  try:
65    lines = file.read()
66  finally:
67    file.close()
68  return lines
69
70
71EVAL_PATTERN = re.compile(r'\beval\s*\(')
72WITH_PATTERN = re.compile(r'\bwith\s*\(')
73INVALID_ERROR_MESSAGE_PATTERN = re.compile(
74    r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
75NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
76
77def Validate(lines):
78  # Because of simplified context setup, eval and with is not
79  # allowed in the natives files.
80  if EVAL_PATTERN.search(lines):
81    raise Error("Eval disallowed in natives.")
82  if WITH_PATTERN.search(lines):
83    raise Error("With statements disallowed in natives.")
84  invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
85  if invalid_error:
86    raise Error("Unknown error message template '%s'" % invalid_error.group(1))
87  if NEW_ERROR_PATTERN.search(lines):
88    raise Error("Error constructed without message template.")
89  # Pass lines through unchanged.
90  return lines
91
92
93def ExpandConstants(lines, constants):
94  for key, value in constants:
95    lines = key.sub(str(value), lines)
96  return lines
97
98
99def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
100  pattern_match = name_pattern.search(lines, pos)
101  while pattern_match is not None:
102    # Scan over the arguments
103    height = 1
104    start = pattern_match.start()
105    end = pattern_match.end()
106    assert lines[end - 1] == '('
107    last_match = end
108    arg_index = [0]  # Wrap state into array, to work around Python "scoping"
109    mapping = { }
110    def add_arg(str):
111      # Remember to expand recursively in the arguments
112      if arg_index[0] >= len(macro.args):
113        lineno = lines.count(os.linesep, 0, start) + 1
114        raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern))
115      replacement = expander(str.strip())
116      mapping[macro.args[arg_index[0]]] = replacement
117      arg_index[0] += 1
118    while end < len(lines) and height > 0:
119      # We don't count commas at higher nesting levels.
120      if lines[end] == ',' and height == 1:
121        add_arg(lines[last_match:end])
122        last_match = end + 1
123      elif lines[end] in ['(', '{', '[']:
124        height = height + 1
125      elif lines[end] in [')', '}', ']']:
126        height = height - 1
127      end = end + 1
128    # Remember to add the last match.
129    add_arg(lines[last_match:end-1])
130    if arg_index[0] < len(macro.args) -1:
131      lineno = lines.count(os.linesep, 0, start) + 1
132      raise Error('line %s: Too few arguments for macro "%s"' % (lineno, name_pattern.pattern))
133    result = macro.expand(mapping)
134    # Replace the occurrence of the macro with the expansion
135    lines = lines[:start] + result + lines[end:]
136    pattern_match = name_pattern.search(lines, start + len(result))
137  return lines
138
139def ExpandMacros(lines, macros):
140  # We allow macros to depend on the previously declared macros, but
141  # we don't allow self-dependecies or recursion.
142  for name_pattern, macro in reversed(macros):
143    def expander(s):
144      return ExpandMacros(s, macros)
145    lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
146  return lines
147
148class TextMacro:
149  def __init__(self, args, body):
150    self.args = args
151    self.body = body
152  def expand(self, mapping):
153    # Keys could be substrings of earlier values. To avoid unintended
154    # clobbering, apply all replacements simultaneously.
155    any_key_pattern = "|".join(re.escape(k) for k in mapping.iterkeys())
156    def replace(match):
157      return mapping[match.group(0)]
158    return re.sub(any_key_pattern, replace, self.body)
159
160CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
161MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
162
163
164def ReadMacros(lines):
165  constants = []
166  macros = []
167  for line in lines.split('\n'):
168    hash = line.find('#')
169    if hash != -1: line = line[:hash]
170    line = line.strip()
171    if len(line) is 0: continue
172    const_match = CONST_PATTERN.match(line)
173    if const_match:
174      name = const_match.group(1)
175      value = const_match.group(2).strip()
176      constants.append((re.compile("\\b%s\\b" % name), value))
177    else:
178      macro_match = MACRO_PATTERN.match(line)
179      if macro_match:
180        name = macro_match.group(1)
181        args = [match.strip() for match in macro_match.group(2).split(',')]
182        body = macro_match.group(3).strip()
183        macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
184      else:
185        raise Error("Illegal line: " + line)
186  return (constants, macros)
187
188
189TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),')
190
191def ReadMessageTemplates(lines):
192  templates = []
193  index = 0
194  for line in lines.split('\n'):
195    template_match = TEMPLATE_PATTERN.match(line)
196    if template_match:
197      name = "k%s" % template_match.group(1)
198      value = index
199      index = index + 1
200      templates.append((re.compile("\\b%s\\b" % name), value))
201  return templates
202
203INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
204INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
205
206def ExpandInlineMacros(lines):
207  pos = 0
208  while True:
209    macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
210    if macro_match is None:
211      # no more macros
212      return lines
213    name = macro_match.group(1)
214    args = [match.strip() for match in macro_match.group(2).split(',')]
215    end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
216    if end_macro_match is None:
217      raise Error("Macro %s unclosed" % name)
218    body = lines[macro_match.end():end_macro_match.start()]
219
220    # remove macro definition
221    lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
222    name_pattern = re.compile("\\b%s\\(" % name)
223    macro = TextMacro(args, body)
224
225    # advance position to where the macro definition was
226    pos = macro_match.start()
227
228    def non_expander(s):
229      return s
230    lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
231
232
233INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n')
234
235def ExpandInlineConstants(lines):
236  pos = 0
237  while True:
238    const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
239    if const_match is None:
240      # no more constants
241      return lines
242    name = const_match.group(1)
243    replacement = const_match.group(2)
244    name_pattern = re.compile("\\b%s\\b" % name)
245
246    # remove constant definition and replace
247    lines = (lines[:const_match.start()] +
248             re.sub(name_pattern, replacement, lines[const_match.end():]))
249
250    # advance position to where the constant definition was
251    pos = const_match.start()
252
253
254HEADER_TEMPLATE = """\
255// Copyright 2011 Google Inc. All Rights Reserved.
256
257// This file was generated from .js source files by GYP.  If you
258// want to make changes to this file you should either change the
259// javascript source files or the GYP script.
260
261#include "src/v8.h"
262#include "src/snapshot/natives.h"
263#include "src/utils.h"
264
265namespace v8 {
266namespace internal {
267
268%(sources_declaration)s\
269
270  template <>
271  int NativesCollection<%(type)s>::GetBuiltinsCount() {
272    return %(builtin_count)i;
273  }
274
275  template <>
276  int NativesCollection<%(type)s>::GetDebuggerCount() {
277    return %(debugger_count)i;
278  }
279
280  template <>
281  int NativesCollection<%(type)s>::GetIndex(const char* name) {
282%(get_index_cases)s\
283    return -1;
284  }
285
286  template <>
287  Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
288%(get_script_source_cases)s\
289    return Vector<const char>("", 0);
290  }
291
292  template <>
293  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
294%(get_script_name_cases)s\
295    return Vector<const char>("", 0);
296  }
297
298  template <>
299  Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
300    return Vector<const char>(sources, %(total_length)i);
301  }
302}  // internal
303}  // v8
304"""
305
306SOURCES_DECLARATION = """\
307  static const char sources[] = { %s };
308"""
309
310
311GET_INDEX_CASE = """\
312    if (strcmp(name, "%(id)s") == 0) return %(i)i;
313"""
314
315
316GET_SCRIPT_SOURCE_CASE = """\
317    if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
318"""
319
320
321GET_SCRIPT_NAME_CASE = """\
322    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
323"""
324
325
326def BuildFilterChain(macro_filename, message_template_file):
327  """Build the chain of filter functions to be applied to the sources.
328
329  Args:
330    macro_filename: Name of the macro file, if any.
331
332  Returns:
333    A function (string -> string) that processes a source file.
334  """
335  filter_chain = []
336
337  if macro_filename:
338    (consts, macros) = ReadMacros(ReadFile(macro_filename))
339    filter_chain.append(lambda l: ExpandMacros(l, macros))
340    filter_chain.append(lambda l: ExpandConstants(l, consts))
341
342  if message_template_file:
343    message_templates = ReadMessageTemplates(ReadFile(message_template_file))
344    filter_chain.append(lambda l: ExpandConstants(l, message_templates))
345
346  filter_chain.extend([
347    RemoveCommentsEmptyLinesAndWhitespace,
348    ExpandInlineMacros,
349    ExpandInlineConstants,
350    Validate,
351    jsmin.JavaScriptMinifier().JSMinify
352  ])
353
354  def chain(f1, f2):
355    return lambda x: f2(f1(x))
356
357  return reduce(chain, filter_chain)
358
359def BuildExtraFilterChain():
360  return lambda x: RemoveCommentsEmptyLinesAndWhitespace(Validate(x))
361
362class Sources:
363  def __init__(self):
364    self.names = []
365    self.modules = []
366    self.is_debugger_id = []
367
368
369def IsDebuggerFile(filename):
370  return os.path.basename(os.path.dirname(filename)) == "debug"
371
372def IsMacroFile(filename):
373  return filename.endswith("macros.py")
374
375def IsMessageTemplateFile(filename):
376  return filename.endswith("messages.h")
377
378
379def PrepareSources(source_files, native_type, emit_js):
380  """Read, prepare and assemble the list of source files.
381
382  Args:
383    source_files: List of JavaScript-ish source files. A file named macros.py
384        will be treated as a list of macros.
385    native_type: String corresponding to a NativeType enum value, allowing us
386        to treat different types of sources differently.
387    emit_js: True if we should skip the byte conversion and just leave the
388        sources as JS strings.
389
390  Returns:
391    An instance of Sources.
392  """
393  macro_file = None
394  macro_files = filter(IsMacroFile, source_files)
395  assert len(macro_files) in [0, 1]
396  if macro_files:
397    source_files.remove(macro_files[0])
398    macro_file = macro_files[0]
399
400  message_template_file = None
401  message_template_files = filter(IsMessageTemplateFile, source_files)
402  assert len(message_template_files) in [0, 1]
403  if message_template_files:
404    source_files.remove(message_template_files[0])
405    message_template_file = message_template_files[0]
406
407  filters = None
408  if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"):
409    filters = BuildExtraFilterChain()
410  else:
411    filters = BuildFilterChain(macro_file, message_template_file)
412
413  # Sort 'debugger' sources first.
414  source_files = sorted(source_files,
415                        lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
416
417  source_files_and_contents = [(f, ReadFile(f)) for f in source_files]
418
419  # Have a single not-quite-empty source file if there are none present;
420  # otherwise you get errors trying to compile an empty C++ array.
421  # It cannot be empty (or whitespace, which gets trimmed to empty), as
422  # the deserialization code assumes each file is nonempty.
423  if not source_files_and_contents:
424    source_files_and_contents = [("dummy.js", "(function() {})")]
425
426  result = Sources()
427
428  for (source, contents) in source_files_and_contents:
429    try:
430      lines = filters(contents)
431    except Error as e:
432      raise Error("In file %s:\n%s" % (source, str(e)))
433
434    result.modules.append(lines)
435
436    is_debugger = IsDebuggerFile(source)
437    result.is_debugger_id.append(is_debugger)
438
439    name = os.path.basename(source)[:-3]
440    result.names.append(name)
441
442  return result
443
444
445def BuildMetadata(sources, source_bytes, native_type):
446  """Build the meta data required to generate a libaries file.
447
448  Args:
449    sources: A Sources instance with the prepared sources.
450    source_bytes: A list of source bytes.
451        (The concatenation of all sources; might be compressed.)
452    native_type: The parameter for the NativesCollection template.
453
454  Returns:
455    A dictionary for use with HEADER_TEMPLATE.
456  """
457  total_length = len(source_bytes)
458  raw_sources = "".join(sources.modules)
459
460  # The sources are expected to be ASCII-only.
461  assert not filter(lambda value: ord(value) >= 128, raw_sources)
462
463  # Loop over modules and build up indices into the source blob:
464  get_index_cases = []
465  get_script_name_cases = []
466  get_script_source_cases = []
467  offset = 0
468  for i in xrange(len(sources.modules)):
469    native_name = "native %s.js" % sources.names[i]
470    d = {
471        "i": i,
472        "id": sources.names[i],
473        "name": native_name,
474        "length": len(native_name),
475        "offset": offset,
476        "source_length": len(sources.modules[i]),
477    }
478    get_index_cases.append(GET_INDEX_CASE % d)
479    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
480    get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
481    offset += len(sources.modules[i])
482  assert offset == len(raw_sources)
483
484  metadata = {
485    "builtin_count": len(sources.modules),
486    "debugger_count": sum(sources.is_debugger_id),
487    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
488    "total_length": total_length,
489    "get_index_cases": "".join(get_index_cases),
490    "get_script_source_cases": "".join(get_script_source_cases),
491    "get_script_name_cases": "".join(get_script_name_cases),
492    "type": native_type,
493  }
494  return metadata
495
496
497def PutInt(blob_file, value):
498  assert(value >= 0 and value < (1 << 28))
499  if (value < 1 << 6):
500    size = 1
501  elif (value < 1 << 14):
502    size = 2
503  elif (value < 1 << 22):
504    size = 3
505  else:
506    size = 4
507  value_with_length = (value << 2) | (size - 1)
508
509  byte_sequence = bytearray()
510  for i in xrange(size):
511    byte_sequence.append(value_with_length & 255)
512    value_with_length >>= 8;
513  blob_file.write(byte_sequence)
514
515
516def PutStr(blob_file, value):
517  PutInt(blob_file, len(value));
518  blob_file.write(value);
519
520
521def WriteStartupBlob(sources, startup_blob):
522  """Write a startup blob, as expected by V8 Initialize ...
523    TODO(vogelheim): Add proper method name.
524
525  Args:
526    sources: A Sources instance with the prepared sources.
527    startup_blob_file: Name of file to write the blob to.
528  """
529  output = open(startup_blob, "wb")
530
531  debug_sources = sum(sources.is_debugger_id);
532  PutInt(output, debug_sources)
533  for i in xrange(debug_sources):
534    PutStr(output, sources.names[i]);
535    PutStr(output, sources.modules[i]);
536
537  PutInt(output, len(sources.names) - debug_sources)
538  for i in xrange(debug_sources, len(sources.names)):
539    PutStr(output, sources.names[i]);
540    PutStr(output, sources.modules[i]);
541
542  output.close()
543
544
545def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
546  prepared_sources = PrepareSources(sources, native_type, emit_js)
547  sources_output = "".join(prepared_sources.modules)
548  metadata = BuildMetadata(prepared_sources, sources_output, native_type)
549
550  # Optionally emit raw file.
551  if raw_file:
552    output = open(raw_file, "w")
553    output.write(sources_output)
554    output.close()
555
556  if startup_blob:
557    WriteStartupBlob(prepared_sources, startup_blob)
558
559  # Emit resulting source file.
560  output = open(target, "w")
561  if emit_js:
562    output.write(sources_output)
563  else:
564    output.write(HEADER_TEMPLATE % metadata)
565  output.close()
566
567
568def main():
569  parser = optparse.OptionParser()
570  parser.add_option("--raw",
571                    help="file to write the processed sources array to.")
572  parser.add_option("--startup_blob",
573                    help="file to write the startup blob to.")
574  parser.add_option("--js",
575                    help="writes a JS file output instead of a C file",
576                    action="store_true", default=False, dest='js')
577  parser.add_option("--nojs", action="store_false", default=False, dest='js')
578  parser.set_usage("""js2c out.cc type sources.js ...
579        out.cc: C code to be generated.
580        type: type parameter for NativesCollection template.
581        sources.js: JS internal sources or macros.py.""")
582  (options, args) = parser.parse_args()
583  JS2C(args[2:],
584       args[0],
585       args[1],
586       options.raw,
587       options.startup_blob,
588       options.js)
589
590
591if __name__ == "__main__":
592  main()
593