1#!/usr/bin/env python 2# 3# Copyright 2006-2008 the V8 project authors. All rights reserved. 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: 7# 8# * Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# * Redistributions in binary form must reproduce the above 11# copyright notice, this list of conditions and the following 12# disclaimer in the documentation and/or other materials provided 13# with the distribution. 14# * Neither the name of Google Inc. nor the names of its 15# contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30""" 31This is a utility for converting JavaScript source code into uint16_t[], 32that are used for embedding JavaScript code into the Node.js binary. 33""" 34import argparse 35import os 36import re 37import functools 38import codecs 39import utils 40 41def ReadFile(filename): 42 if is_verbose: 43 print(filename) 44 with codecs.open(filename, "r", "utf-8") as f: 45 lines = f.read() 46 return lines 47 48 49TEMPLATE = """ 50#include "env-inl.h" 51#include "node_builtins.h" 52#include "node_internals.h" 53 54namespace node {{ 55 56namespace builtins {{ 57 58{0} 59 60namespace {{ 61const ThreadsafeCopyOnWrite<BuiltinSourceMap> global_source_map {{ 62 BuiltinSourceMap{{ {1} }} 63}}; 64}} 65 66void BuiltinLoader::LoadJavaScriptSource() {{ 67 source_ = global_source_map; 68}} 69 70UnionBytes BuiltinLoader::GetConfig() {{ 71 return UnionBytes(config_raw, {2}); // config.gypi 72}} 73 74}} // namespace builtins 75 76}} // namespace node 77""" 78 79ONE_BYTE_STRING = """ 80static const uint8_t {0}[] = {{ 81{1} 82}}; 83""" 84 85TWO_BYTE_STRING = """ 86static const uint16_t {0}[] = {{ 87{1} 88}}; 89""" 90 91INITIALIZER = '{{"{0}", UnionBytes{{{1}, {2}}} }},' 92 93CONFIG_GYPI_ID = 'config_raw' 94 95SLUGGER_RE = re.compile(r'[.\-/]') 96 97is_verbose = False 98 99def GetDefinition(var, source, step=30): 100 template = ONE_BYTE_STRING 101 code_points = [ord(c) for c in source] 102 if any(c > 127 for c in code_points): 103 template = TWO_BYTE_STRING 104 # Treat non-ASCII as UTF-8 and encode as UTF-16 Little Endian. 105 encoded_source = bytearray(source, 'utf-16le') 106 code_points = [ 107 encoded_source[i] + (encoded_source[i + 1] * 256) 108 for i in range(0, len(encoded_source), 2) 109 ] 110 111 # For easier debugging, align to the common 3 char for code-points. 112 elements_s = ['%3s' % x for x in code_points] 113 # Put no more then `step` code-points in a line. 114 slices = [elements_s[i:i + step] for i in range(0, len(elements_s), step)] 115 lines = [','.join(s) for s in slices] 116 array_content = ',\n'.join(lines) 117 definition = template.format(var, array_content) 118 119 return definition, len(code_points) 120 121 122def AddModule(filename, definitions, initializers): 123 code = ReadFile(filename) 124 name = NormalizeFileName(filename) 125 slug = SLUGGER_RE.sub('_', name) 126 var = slug + '_raw' 127 definition, size = GetDefinition(var, code) 128 initializer = INITIALIZER.format(name, var, size) 129 definitions.append(definition) 130 initializers.append(initializer) 131 132def NormalizeFileName(filename): 133 split = filename.split('/') 134 if split[0] == 'deps': 135 split = ['internal'] + split 136 else: # `lib/**/*.js` so drop the 'lib' part 137 split = split[1:] 138 if len(split): 139 filename = '/'.join(split) 140 return os.path.splitext(filename)[0] 141 142 143def JS2C(source_files, target): 144 # Build source code lines 145 definitions = [] 146 initializers = [] 147 148 for filename in source_files['.js']: 149 AddModule(filename, definitions, initializers) 150 for filename in source_files['.mjs']: 151 AddModule(filename, definitions, initializers) 152 153 config_def, config_size = handle_config_gypi(source_files['config.gypi']) 154 definitions.append(config_def) 155 156 # Emit result 157 definitions = ''.join(definitions) 158 initializers = '\n '.join(initializers) 159 out = TEMPLATE.format(definitions, initializers, config_size) 160 write_if_chaged(out, target) 161 162 163def handle_config_gypi(config_filename): 164 # if its a gypi file we're going to want it as json 165 # later on anyway, so get it out of the way now 166 config = ReadFile(config_filename) 167 config = jsonify(config) 168 config_def, config_size = GetDefinition(CONFIG_GYPI_ID, config) 169 return config_def, config_size 170 171 172def jsonify(config): 173 # 1. string comments 174 config = re.sub(r'#.*?\n', '', config) 175 # 2. join multiline strings 176 config = re.sub(r"'$\s+'", '', config, flags=re.M) 177 # 3. normalize string literals from ' into " 178 config = re.sub('\'', '"', config) 179 # 4. turn pseudo-booleans strings into Booleans 180 config = re.sub('"true"', 'true', config) 181 config = re.sub('"false"', 'false', config) 182 return config 183 184 185def write_if_chaged(content, target): 186 if os.path.exists(target): 187 with open(target, 'rt') as existing: 188 old_content = existing.read() 189 else: 190 old_content = '' 191 if old_content == content: 192 os.utime(target, None) 193 return 194 with open(target, "wt") as output: 195 output.write(content) 196 197 198def SourceFileByExt(files_by_ext, filename): 199 """ 200 :type files_by_ext: dict 201 :type filename: str 202 :rtype: dict 203 """ 204 ext = os.path.splitext(filename)[-1] 205 files_by_ext.setdefault(ext, []).append(filename) 206 return files_by_ext 207 208def main(): 209 parser = argparse.ArgumentParser( 210 description='Convert code files into `uint16_t[]`s', 211 fromfile_prefix_chars='@' 212 ) 213 parser.add_argument('--target', help='output file') 214 parser.add_argument( 215 '--directory', 216 default=None, 217 help='input file directory') 218 parser.add_argument( 219 '--root', 220 default=None, 221 help='root directory containing the sources') 222 parser.add_argument('--verbose', action='store_true', help='output file') 223 parser.add_argument('sources', nargs='*', help='input files') 224 options = parser.parse_args() 225 global is_verbose 226 is_verbose = options.verbose 227 sources = options.sources 228 229 if options.root is not None: 230 os.chdir(options.root) 231 232 if options.directory is not None: 233 js_files = utils.SearchFiles(options.directory, 'js') 234 mjs_files = utils.SearchFiles(options.directory, 'mjs') 235 sources = js_files + mjs_files + options.sources 236 237 source_files = functools.reduce(SourceFileByExt, sources, {}) 238 239 # Should have exactly 3 types: `.js`, `.mjs` and `.gypi` 240 assert len(source_files) == 3 241 # Currently config.gypi is the only `.gypi` file allowed 242 assert len(source_files['.gypi']) == 1 243 assert os.path.basename(source_files['.gypi'][0]) == 'config.gypi' 244 source_files['config.gypi'] = source_files.pop('.gypi')[0] 245 JS2C(source_files, options.target) 246 247 248if __name__ == "__main__": 249 main() 250