• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3#
4# Copyright 2012 the V8 project authors. All rights reserved.
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10#       notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12#       copyright notice, this list of conditions and the following
13#       disclaimer in the documentation and/or other materials provided
14#       with the distribution.
15#     * Neither the name of Google Inc. nor the names of its
16#       contributors may be used to endorse or promote products derived
17#       from this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31
32#
33# Emits a C++ file to be compiled and linked into libv8 to support postmortem
34# debugging tools.  Most importantly, this tool emits constants describing V8
35# internals:
36#
37#    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
38#    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
39#    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
40#    v8dbg_frametype_NAME = VALUE               Describes stack frame values
41#    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
42#    v8dbg_prop_NAME = OFFSET                   Object property offsets
43#    v8dbg_NAME = VALUE                         Miscellaneous values
44#
45# These constants are declared as global integers so that they'll be present in
46# the generated libv8 binary.
47#
48
49import re
50import sys
51
52#
53# Miscellaneous constants, tags, and masks used for object identification.
54#
55consts_misc = [
56    { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
57
58    { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
59    { 'name': 'StringTag',              'value': 'kStringTag' },
60    { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
61
62    { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
63    { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
64    { 'name': 'OneByteStringTag',       'value': 'kOneByteStringTag' },
65
66    { 'name': 'StringRepresentationMask',
67        'value': 'kStringRepresentationMask' },
68    { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
69    { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
70    { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
71    { 'name': 'SlicedStringTag',        'value': 'kSlicedStringTag' },
72
73    { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
74    { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
75    { 'name': 'SmiTag',                 'value': 'kSmiTag' },
76    { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
77    { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
78    { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
79    { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
80
81    { 'name': 'OddballFalse',           'value': 'Oddball::kFalse' },
82    { 'name': 'OddballTrue',            'value': 'Oddball::kTrue' },
83    { 'name': 'OddballTheHole',         'value': 'Oddball::kTheHole' },
84    { 'name': 'OddballNull',            'value': 'Oddball::kNull' },
85    { 'name': 'OddballArgumentMarker',  'value': 'Oddball::kArgumentMarker' },
86    { 'name': 'OddballUndefined',       'value': 'Oddball::kUndefined' },
87    { 'name': 'OddballUninitialized',   'value': 'Oddball::kUninitialized' },
88    { 'name': 'OddballOther',           'value': 'Oddball::kOther' },
89    { 'name': 'OddballException',       'value': 'Oddball::kException' },
90
91    { 'name': 'prop_idx_first',
92        'value': 'DescriptorArray::kFirstIndex' },
93    { 'name': 'prop_type_field',
94        'value': 'DATA' },
95    { 'name': 'prop_type_mask',
96        'value': 'PropertyDetails::TypeField::kMask' },
97    { 'name': 'prop_index_mask',
98        'value': 'PropertyDetails::FieldIndexField::kMask' },
99    { 'name': 'prop_index_shift',
100        'value': 'PropertyDetails::FieldIndexField::kShift' },
101    { 'name': 'prop_representation_mask',
102        'value': 'PropertyDetails::RepresentationField::kMask' },
103    { 'name': 'prop_representation_shift',
104        'value': 'PropertyDetails::RepresentationField::kShift' },
105    { 'name': 'prop_representation_integer8',
106        'value': 'Representation::Kind::kInteger8' },
107    { 'name': 'prop_representation_uinteger8',
108        'value': 'Representation::Kind::kUInteger8' },
109    { 'name': 'prop_representation_integer16',
110        'value': 'Representation::Kind::kInteger16' },
111    { 'name': 'prop_representation_uinteger16',
112        'value': 'Representation::Kind::kUInteger16' },
113    { 'name': 'prop_representation_smi',
114        'value': 'Representation::Kind::kSmi' },
115    { 'name': 'prop_representation_integer32',
116        'value': 'Representation::Kind::kInteger32' },
117    { 'name': 'prop_representation_double',
118        'value': 'Representation::Kind::kDouble' },
119    { 'name': 'prop_representation_heapobject',
120        'value': 'Representation::Kind::kHeapObject' },
121    { 'name': 'prop_representation_tagged',
122        'value': 'Representation::Kind::kTagged' },
123    { 'name': 'prop_representation_external',
124        'value': 'Representation::Kind::kExternal' },
125
126    { 'name': 'prop_desc_key',
127        'value': 'DescriptorArray::kDescriptorKey' },
128    { 'name': 'prop_desc_details',
129        'value': 'DescriptorArray::kDescriptorDetails' },
130    { 'name': 'prop_desc_value',
131        'value': 'DescriptorArray::kDescriptorValue' },
132    { 'name': 'prop_desc_size',
133        'value': 'DescriptorArray::kDescriptorSize' },
134
135    { 'name': 'elements_fast_holey_elements',
136        'value': 'FAST_HOLEY_ELEMENTS' },
137    { 'name': 'elements_fast_elements',
138        'value': 'FAST_ELEMENTS' },
139    { 'name': 'elements_dictionary_elements',
140        'value': 'DICTIONARY_ELEMENTS' },
141
142    { 'name': 'bit_field2_elements_kind_mask',
143        'value': 'Map::ElementsKindBits::kMask' },
144    { 'name': 'bit_field2_elements_kind_shift',
145        'value': 'Map::ElementsKindBits::kShift' },
146    { 'name': 'bit_field3_dictionary_map_shift',
147        'value': 'Map::DictionaryMap::kShift' },
148    { 'name': 'bit_field3_number_of_own_descriptors_mask',
149        'value': 'Map::NumberOfOwnDescriptorsBits::kMask' },
150    { 'name': 'bit_field3_number_of_own_descriptors_shift',
151        'value': 'Map::NumberOfOwnDescriptorsBits::kShift' },
152
153    { 'name': 'off_fp_context',
154        'value': 'StandardFrameConstants::kContextOffset' },
155    { 'name': 'off_fp_constant_pool',
156        'value': 'StandardFrameConstants::kConstantPoolOffset' },
157    { 'name': 'off_fp_marker',
158        'value': 'StandardFrameConstants::kMarkerOffset' },
159    { 'name': 'off_fp_function',
160        'value': 'JavaScriptFrameConstants::kFunctionOffset' },
161    { 'name': 'off_fp_args',
162        'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
163
164    { 'name': 'scopeinfo_idx_nparams',
165        'value': 'ScopeInfo::kParameterCount' },
166    { 'name': 'scopeinfo_idx_nstacklocals',
167        'value': 'ScopeInfo::kStackLocalCount' },
168    { 'name': 'scopeinfo_idx_ncontextlocals',
169        'value': 'ScopeInfo::kContextLocalCount' },
170    { 'name': 'scopeinfo_idx_ncontextglobals',
171        'value': 'ScopeInfo::kContextGlobalCount' },
172    { 'name': 'scopeinfo_idx_first_vars',
173        'value': 'ScopeInfo::kVariablePartIndex' },
174
175    { 'name': 'sharedfunctioninfo_start_position_mask',
176        'value': 'SharedFunctionInfo::kStartPositionMask' },
177    { 'name': 'sharedfunctioninfo_start_position_shift',
178        'value': 'SharedFunctionInfo::kStartPositionShift' },
179
180    { 'name': 'jsarray_buffer_was_neutered_mask',
181        'value': 'JSArrayBuffer::WasNeutered::kMask' },
182    { 'name': 'jsarray_buffer_was_neutered_shift',
183        'value': 'JSArrayBuffer::WasNeutered::kShift' },
184];
185
186#
187# The following useful fields are missing accessors, so we define fake ones.
188#
189extras_accessors = [
190    'JSFunction, context, Context, kContextOffset',
191    'Context, closure_index, int, CLOSURE_INDEX',
192    'Context, native_context_index, int, NATIVE_CONTEXT_INDEX',
193    'Context, previous_index, int, PREVIOUS_INDEX',
194    'Context, min_context_slots, int, MIN_CONTEXT_SLOTS',
195    'HeapObject, map, Map, kMapOffset',
196    'JSObject, elements, Object, kElementsOffset',
197    'FixedArray, data, uintptr_t, kHeaderSize',
198    'JSArrayBuffer, backing_store, Object, kBackingStoreOffset',
199    'JSArrayBufferView, byte_offset, Object, kByteOffsetOffset',
200    'JSTypedArray, length, Object, kLengthOffset',
201    'Map, instance_attributes, int, kInstanceAttributesOffset',
202    'Map, inobject_properties_or_constructor_function_index, int, kInObjectPropertiesOrConstructorFunctionIndexOffset',
203    'Map, instance_size, int, kInstanceSizeOffset',
204    'Map, bit_field, char, kBitFieldOffset',
205    'Map, bit_field2, char, kBitField2Offset',
206    'Map, bit_field3, int, kBitField3Offset',
207    'Map, prototype, Object, kPrototypeOffset',
208    'NameDictionaryShape, prefix_size, int, kPrefixSize',
209    'NameDictionaryShape, entry_size, int, kEntrySize',
210    'NameDictionary, prefix_start_index, int, kPrefixStartIndex',
211    'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize',
212    'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize',
213    'NumberDictionaryShape, entry_size, int, kEntrySize',
214    'Oddball, kind_offset, int, kKindOffset',
215    'HeapNumber, value, double, kValueOffset',
216    'ConsString, first, String, kFirstOffset',
217    'ConsString, second, String, kSecondOffset',
218    'ExternalString, resource, Object, kResourceOffset',
219    'SeqOneByteString, chars, char, kHeaderSize',
220    'SeqTwoByteString, chars, char, kHeaderSize',
221    'SharedFunctionInfo, code, Code, kCodeOffset',
222    'SharedFunctionInfo, scope_info, ScopeInfo, kScopeInfoOffset',
223    'SlicedString, parent, String, kParentOffset',
224    'Code, instruction_start, uintptr_t, kHeaderSize',
225    'Code, instruction_size, int, kInstructionSizeOffset',
226];
227
228#
229# The following is a whitelist of classes we expect to find when scanning the
230# source code. This list is not exhaustive, but it's still useful to identify
231# when this script gets out of sync with the source. See load_objects().
232#
233expected_classes = [
234    'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
235    'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
236    'SeqOneByteString', 'SharedFunctionInfo'
237];
238
239
240#
241# The following structures store high-level representations of the structures
242# for which we're going to emit descriptive constants.
243#
244types = {};             # set of all type names
245typeclasses = {};       # maps type names to corresponding class names
246klasses = {};           # known classes, including parents
247fields = [];            # field declarations
248
249header = '''
250/*
251 * This file is generated by %s.  Do not edit directly.
252 */
253
254#include "src/v8.h"
255#include "src/frames.h"
256#include "src/frames-inl.h" /* for architecture-specific frame constants */
257
258using namespace v8::internal;
259
260extern "C" {
261
262/* stack frame constants */
263#define FRAME_CONST(value, klass)       \
264    int v8dbg_frametype_##klass = StackFrame::value;
265
266STACK_FRAME_TYPE_LIST(FRAME_CONST)
267
268#undef FRAME_CONST
269
270''' % sys.argv[0];
271
272footer = '''
273}
274'''
275
276#
277# Get the base class
278#
279def get_base_class(klass):
280        if (klass == 'Object'):
281                return klass;
282
283        if (not (klass in klasses)):
284                return None;
285
286        k = klasses[klass];
287
288        return get_base_class(k['parent']);
289
290#
291# Loads class hierarchy and type information from "objects.h".
292#
293def load_objects():
294        objfilename = sys.argv[2];
295        objfile = open(objfilename, 'r');
296        in_insttype = False;
297
298        typestr = '';
299
300        #
301        # Construct a dictionary for the classes we're sure should be present.
302        #
303        checktypes = {};
304        for klass in expected_classes:
305                checktypes[klass] = True;
306
307        #
308        # Iterate objects.h line-by-line to collect type and class information.
309        # For types, we accumulate a string representing the entire InstanceType
310        # enum definition and parse it later because it's easier to do so
311        # without the embedded newlines.
312        #
313        for line in objfile:
314                if (line.startswith('enum InstanceType {')):
315                        in_insttype = True;
316                        continue;
317
318                if (in_insttype and line.startswith('};')):
319                        in_insttype = False;
320                        continue;
321
322                line = re.sub('//.*', '', line.strip());
323
324                if (in_insttype):
325                        typestr += line;
326                        continue;
327
328                match = re.match('class (\w[^:]*)(: public (\w[^{]*))?\s*{\s*',
329                    line);
330
331                if (match):
332                        klass = match.group(1).strip();
333                        pklass = match.group(3);
334                        if (pklass):
335                                pklass = pklass.strip();
336                        klasses[klass] = { 'parent': pklass };
337
338        #
339        # Process the instance type declaration.
340        #
341        entries = typestr.split(',');
342        for entry in entries:
343                types[re.sub('\s*=.*', '', entry).lstrip()] = True;
344
345        #
346        # Infer class names for each type based on a systematic transformation.
347        # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
348        # class for each type rather than the other way around because there are
349        # fewer cases where one type maps to more than one class than the other
350        # way around.
351        #
352        for type in types:
353                #
354                # Symbols and Strings are implemented using the same classes.
355                #
356                usetype = re.sub('SYMBOL_', 'STRING_', type);
357
358                #
359                # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
360                #
361                usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
362
363                #
364                # Remove the "_TYPE" suffix and then convert to camel case,
365                # except that a "JS" prefix remains uppercase (as in
366                # "JS_FUNCTION_TYPE" => "JSFunction").
367                #
368                if (not usetype.endswith('_TYPE')):
369                        continue;
370
371                usetype = usetype[0:len(usetype) - len('_TYPE')];
372                parts = usetype.split('_');
373                cctype = '';
374
375                if (parts[0] == 'JS'):
376                        cctype = 'JS';
377                        start = 1;
378                else:
379                        cctype = '';
380                        start = 0;
381
382                for ii in range(start, len(parts)):
383                        part = parts[ii];
384                        cctype += part[0].upper() + part[1:].lower();
385
386                #
387                # Mapping string types is more complicated.  Both types and
388                # class names for Strings specify a representation (e.g., Seq,
389                # Cons, External, or Sliced) and an encoding (TwoByte/OneByte),
390                # In the simplest case, both of these are explicit in both
391                # names, as in:
392                #
393                #       EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString
394                #
395                # However, either the representation or encoding can be omitted
396                # from the type name, in which case "Seq" and "TwoByte" are
397                # assumed, as in:
398                #
399                #       STRING_TYPE => SeqTwoByteString
400                #
401                # Additionally, sometimes the type name has more information
402                # than the class, as in:
403                #
404                #       CONS_ONE_BYTE_STRING_TYPE => ConsString
405                #
406                # To figure this out dynamically, we first check for a
407                # representation and encoding and add them if they're not
408                # present.  If that doesn't yield a valid class name, then we
409                # strip out the representation.
410                #
411                if (cctype.endswith('String')):
412                        if (cctype.find('Cons') == -1 and
413                            cctype.find('External') == -1 and
414                            cctype.find('Sliced') == -1):
415                                if (cctype.find('OneByte') != -1):
416                                        cctype = re.sub('OneByteString$',
417                                            'SeqOneByteString', cctype);
418                                else:
419                                        cctype = re.sub('String$',
420                                            'SeqString', cctype);
421
422                        if (cctype.find('OneByte') == -1):
423                                cctype = re.sub('String$', 'TwoByteString',
424                                    cctype);
425
426                        if (not (cctype in klasses)):
427                                cctype = re.sub('OneByte', '', cctype);
428                                cctype = re.sub('TwoByte', '', cctype);
429
430                #
431                # Despite all that, some types have no corresponding class.
432                #
433                if (cctype in klasses):
434                        typeclasses[type] = cctype;
435                        if (cctype in checktypes):
436                                del checktypes[cctype];
437
438        if (len(checktypes) > 0):
439                for klass in checktypes:
440                        print('error: expected class \"%s\" not found' % klass);
441
442                sys.exit(1);
443
444
445#
446# For a given macro call, pick apart the arguments and return an object
447# describing the corresponding output constant.  See load_fields().
448#
449def parse_field(call):
450        # Replace newlines with spaces.
451        for ii in range(0, len(call)):
452                if (call[ii] == '\n'):
453                        call[ii] == ' ';
454
455        idx = call.find('(');
456        kind = call[0:idx];
457        rest = call[idx + 1: len(call) - 1];
458        args = re.split('\s*,\s*', rest);
459
460        consts = [];
461
462        if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
463                klass = args[0];
464                field = args[1];
465                dtype = args[2];
466                offset = args[3];
467
468                return ({
469                    'name': 'class_%s__%s__%s' % (klass, field, dtype),
470                    'value': '%s::%s' % (klass, offset)
471                });
472
473        assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI');
474        klass = args[0];
475        field = args[1];
476        offset = args[2];
477
478        return ({
479            'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
480            'value': '%s::%s' % (klass, offset)
481        });
482
483#
484# Load field offset information from objects-inl.h.
485#
486def load_fields():
487        inlfilename = sys.argv[3];
488        inlfile = open(inlfilename, 'r');
489
490        #
491        # Each class's fields and the corresponding offsets are described in the
492        # source by calls to macros like "ACCESSORS" (and friends).  All we do
493        # here is extract these macro invocations, taking into account that they
494        # may span multiple lines and may contain nested parentheses.  We also
495        # call parse_field() to pick apart the invocation.
496        #
497        prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE',
498                     'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ];
499        current = '';
500        opens = 0;
501
502        for line in inlfile:
503                if (opens > 0):
504                        # Continuation line
505                        for ii in range(0, len(line)):
506                                if (line[ii] == '('):
507                                        opens += 1;
508                                elif (line[ii] == ')'):
509                                        opens -= 1;
510
511                                if (opens == 0):
512                                        break;
513
514                        current += line[0:ii + 1];
515                        continue;
516
517                for prefix in prefixes:
518                        if (not line.startswith(prefix + '(')):
519                                continue;
520
521                        if (len(current) > 0):
522                                fields.append(parse_field(current));
523                                current = '';
524
525                        for ii in range(len(prefix), len(line)):
526                                if (line[ii] == '('):
527                                        opens += 1;
528                                elif (line[ii] == ')'):
529                                        opens -= 1;
530
531                                if (opens == 0):
532                                        break;
533
534                        current += line[0:ii + 1];
535
536        if (len(current) > 0):
537                fields.append(parse_field(current));
538                current = '';
539
540        for body in extras_accessors:
541                fields.append(parse_field('ACCESSORS(%s)' % body));
542
543#
544# Emit a block of constants.
545#
546def emit_set(out, consts):
547        # Fix up overzealous parses.  This could be done inside the
548        # parsers but as there are several, it's easiest to do it here.
549        ws = re.compile('\s+')
550        for const in consts:
551                name = ws.sub('', const['name'])
552                value = ws.sub('', str(const['value']))  # Can be a number.
553                out.write('int v8dbg_%s = %s;\n' % (name, value))
554        out.write('\n');
555
556#
557# Emit the whole output file.
558#
559def emit_config():
560        out = file(sys.argv[1], 'w');
561
562        out.write(header);
563
564        out.write('/* miscellaneous constants */\n');
565        emit_set(out, consts_misc);
566
567        out.write('/* class type information */\n');
568        consts = [];
569        keys = typeclasses.keys();
570        keys.sort();
571        for typename in keys:
572                klass = typeclasses[typename];
573                consts.append({
574                    'name': 'type_%s__%s' % (klass, typename),
575                    'value': typename
576                });
577
578        emit_set(out, consts);
579
580        out.write('/* class hierarchy information */\n');
581        consts = [];
582        keys = klasses.keys();
583        keys.sort();
584        for klassname in keys:
585                pklass = klasses[klassname]['parent'];
586                bklass = get_base_class(klassname);
587                if (bklass != 'Object'):
588                        continue;
589                if (pklass == None):
590                        continue;
591
592                consts.append({
593                    'name': 'parent_%s__%s' % (klassname, pklass),
594                    'value': 0
595                });
596
597        emit_set(out, consts);
598
599        out.write('/* field information */\n');
600        emit_set(out, fields);
601
602        out.write(footer);
603
604if (len(sys.argv) < 4):
605        print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
606        sys.exit(2);
607
608load_objects();
609load_fields();
610emit_config();
611