• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2018 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import os
7import sys
8import subprocess
9import re
10import math
11
12INPUT_PATH = "src/parsing/keywords.txt"
13OUTPUT_PATH = "src/parsing/keywords-gen.h"
14
15# TODO(leszeks): Trimming seems to regress performance, investigate.
16TRIM_CHAR_TABLE = False
17
18
19def next_power_of_2(x):
20  return 1 if x == 0 else 2**int(math.ceil(math.log(x, 2)))
21
22
23def call_with_input(cmd, input_string=""):
24  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
25  stdout, _ = p.communicate(input_string)
26  retcode = p.wait()
27  if retcode != 0:
28    raise subprocess.CalledProcessError(retcode, cmd)
29  return stdout
30
31
32def checked_sub(pattern, sub, out, count=1, flags=0):
33  out, n = re.subn(pattern, sub, out, flags=flags)
34  if n != count:
35    raise Exception("Didn't get exactly %d replacement(s) for pattern: %s" %
36                    (count, pattern))
37  return out
38
39
40def change_sizet_to_int(out):
41  # Literal buffer lengths are given as ints, not size_t
42  return checked_sub(r'\bsize_t\b', 'int', out, count=4)
43
44
45def drop_line_directives(out):
46  # #line causes gcov issue, so drop it
47  return re.sub(r'^#\s*line .*$\n', '', out, flags=re.MULTILINE)
48
49
50def trim_and_dcheck_char_table(out):
51  # Potential keyword strings are known to be lowercase ascii, so chop off the
52  # rest of the table and mask out the char
53
54  reads_re = re.compile(
55      r'asso_values\[static_cast<unsigned char>\(str\[(\d+)\]\)\]')
56
57  dchecks = []
58  for str_read in reads_re.finditer(out):
59    dchecks.append("DCHECK_LT(str[%d], 128);" % int(str_read.group(1)))
60
61  if TRIM_CHAR_TABLE:
62    out = checked_sub(
63        r'static const unsigned char asso_values\[\]\s*=\s*\{(\s*\d+\s*,){96}',
64        "".join(dchecks) + r'static const unsigned char asso_values[32] = {',
65        out,
66        flags=re.MULTILINE)
67    out = checked_sub(
68        reads_re.pattern,
69        r'asso_values[static_cast<unsigned char>(str[(\1)]&31)]',
70        out,
71        count=len(dchecks),
72        flags=re.MULTILINE)
73  else:
74    out = checked_sub(
75        r'static const unsigned char asso_values\[\]\s*=\s*\{',
76        "".join(dchecks) + r'static const unsigned char asso_values[128] = {',
77        out,
78        flags=re.MULTILINE)
79
80  return out
81
82
83def use_isinrange(out):
84  # Our IsInRange method is more efficient than checking for min/max length
85  return checked_sub(r'if \(len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH\)',
86                     r'if (base::IsInRange(len, MIN_WORD_LENGTH, '
87                     + r'MAX_WORD_LENGTH))',
88                     out)
89
90
91def pad_tables(out):
92  # We don't want to compare against the max hash value, so pad the tables up
93  # to a power of two and mask the hash.
94
95  # First get the new size
96  max_hash_value = int(re.search(r'MAX_HASH_VALUE\s*=\s*(\d+)', out).group(1))
97  old_table_length = max_hash_value + 1
98  new_table_length = next_power_of_2(old_table_length)
99  table_padding_len = new_table_length - old_table_length
100
101  # Pad the length table.
102  single_lengthtable_entry = r'\d+'
103  out = checked_sub(
104      r"""
105      static\ const\ unsigned\ char\ kPerfectKeywordLengthTable\[\]\s*=\s*\{
106        (
107          \s*%(single_lengthtable_entry)s\s*
108          (?:,\s*%(single_lengthtable_entry)s\s*)*
109        )
110      \}
111    """ % {'single_lengthtable_entry': single_lengthtable_entry},
112      r'static const unsigned char kPerfectKeywordLengthTable[%d] = { \1 %s }'
113      % (new_table_length, "".join([',0'] * table_padding_len)),
114      out,
115      flags=re.MULTILINE | re.VERBOSE)
116
117  # Pad the word list.
118  single_wordlist_entry = r"""
119      (?:\#line\ \d+\ ".*"$\s*)?
120      \{\s*"[a-z]*"\s*,\s*Token::[A-Z_]+\}
121    """
122  out = checked_sub(
123      r"""
124      static\ const\ struct\ PerfectKeywordHashTableEntry\ kPerfectKeywordHashTable\[\]\s*=\s*\{
125        (
126          \s*%(single_wordlist_entry)s\s*
127          (?:,\s*%(single_wordlist_entry)s\s*)*
128        )
129      \}
130    """ % {'single_wordlist_entry': single_wordlist_entry},
131      r'static const struct PerfectKeywordHashTableEntry kPerfectKeywordHashTable[%d] = {\1 %s }'
132      % (new_table_length, "".join(
133          [',{"",Token::IDENTIFIER}'] * table_padding_len)),
134      out,
135      flags=re.MULTILINE | re.VERBOSE)
136
137  # Mask the hash and replace the range check with DCHECKs.
138  out = checked_sub(r'Hash\s*\(\s*str,\s*len\s*\)',
139                    r'Hash(str, len)&0x%x' % (new_table_length - 1), out)
140  out = checked_sub(
141      r'if \(key <= MAX_HASH_VALUE\)',
142      r'DCHECK_LT(key, arraysize(kPerfectKeywordLengthTable));DCHECK_LT(key, arraysize(kPerfectKeywordHashTable));',
143      out)
144
145  return out
146
147
148def return_token(out):
149  # We want to return the actual token rather than the table entry.
150
151  # Change the return type of the function. Make it inline too.
152  out = checked_sub(
153      r'const\s*struct\s*PerfectKeywordHashTableEntry\s*\*\s*((?:PerfectKeywordHash::)?GetToken)',
154      r'inline Token::Value \1',
155      out,
156      count=2)
157
158  # Change the return value when the keyword is found
159  out = checked_sub(r'return &kPerfectKeywordHashTable\[key\];',
160                    r'return kPerfectKeywordHashTable[key].value;', out)
161
162  # Change the return value when the keyword is not found
163  out = checked_sub(r'return 0;', r'return Token::IDENTIFIER;', out)
164
165  return out
166
167
168def memcmp_to_while(out):
169  # It's faster to loop over the keyword with a while loop than calling memcmp.
170  # Careful, this replacement is quite flaky, because otherwise the regex is
171  # unreadable.
172  return checked_sub(
173      re.escape("if (*str == *s && !memcmp (str + 1, s + 1, len - 1))") + r"\s*"
174      + re.escape("return kPerfectKeywordHashTable[key].value;"),
175      """
176      while(*s!=0) {
177        if (*s++ != *str++) return Token::IDENTIFIER;
178      }
179      return kPerfectKeywordHashTable[key].value;
180      """,
181      out,
182      flags=re.MULTILINE)
183
184
185def wrap_namespace(out):
186  return """// Copyright 2018 the V8 project authors. All rights reserved.
187// Use of this source code is governed by a BSD-style license that can be
188// found in the LICENSE file.
189
190// This file is automatically generated by gen-keywords-gen-h.py and should not
191// be modified manually.
192
193#ifndef V8_PARSING_KEYWORDS_GEN_H_
194#define V8_PARSING_KEYWORDS_GEN_H_
195
196#include "src/parsing/token.h"
197
198namespace v8 {
199namespace internal {
200
201%s
202
203}  // namespace internal
204}  // namespace v8
205
206#endif  // V8_PARSING_KEYWORDS_GEN_H_
207""" % (out)
208
209
210def trim_character_set_warning(out):
211  # gperf generates an error message that is too large, trim it
212
213  return out.replace(
214      '"gperf generated tables don\'t work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."',
215      '"gperf generated tables don\'t work with this execution character set."\\\n// If you see this error, please report a bug to <bug-gperf@gnu.org>.'
216  )
217
218
219def main():
220  try:
221    script_dir = os.path.dirname(sys.argv[0])
222    root_dir = os.path.join(script_dir, '..')
223
224    out = subprocess.check_output(["gperf", "-m100", INPUT_PATH], cwd=root_dir)
225
226    # And now some munging of the generated file.
227    out = change_sizet_to_int(out)
228    out = drop_line_directives(out)
229    out = trim_and_dcheck_char_table(out)
230    out = use_isinrange(out)
231    out = pad_tables(out)
232    out = return_token(out)
233    out = memcmp_to_while(out)
234    out = wrap_namespace(out)
235    out = trim_character_set_warning(out)
236
237    # Final formatting.
238    clang_format_path = os.path.join(root_dir,
239                                     'third_party/depot_tools/clang-format')
240    out = call_with_input([clang_format_path], out)
241
242    with open(os.path.join(root_dir, OUTPUT_PATH), 'w') as f:
243      f.write(out)
244
245    return 0
246
247  except subprocess.CalledProcessError as e:
248    sys.stderr.write("Error calling '{}'\n".format(" ".join(e.cmd)))
249    return e.returncode
250
251
252if __name__ == '__main__':
253  sys.exit(main())
254