1#!/usr/bin/env python 2 3# 4# Copyright 2012 the V8 project authors. All rights reserved. 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following 13# disclaimer in the documentation and/or other materials provided 14# with the distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived 17# from this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30# 31 32# 33# Emits a C++ file to be compiled and linked into libv8 to support postmortem 34# debugging tools. Most importantly, this tool emits constants describing V8 35# internals: 36# 37# v8dbg_type_CLASS__TYPE = VALUE Describes class type values 38# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields 39# v8dbg_parent_CLASS__PARENT Describes class hierarchy 40# v8dbg_frametype_NAME = VALUE Describes stack frame values 41# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets 42# v8dbg_prop_NAME = OFFSET Object property offsets 43# v8dbg_NAME = VALUE Miscellaneous values 44# 45# These constants are declared as global integers so that they'll be present in 46# the generated libv8 binary. 47# 48 49import re 50import sys 51 52# 53# Miscellaneous constants, tags, and masks used for object identification. 54# 55consts_misc = [ 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, 57 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, 59 { 'name': 'StringTag', 'value': 'kStringTag' }, 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, 61 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, 64 { 'name': 'OneByteStringTag', 'value': 'kOneByteStringTag' }, 65 66 { 'name': 'StringRepresentationMask', 67 'value': 'kStringRepresentationMask' }, 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, 71 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' }, 72 73 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, 74 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, 75 { 'name': 'SmiTag', 'value': 'kSmiTag' }, 76 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, 77 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, 78 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' }, 79 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, 80 81 { 'name': 'OddballFalse', 'value': 'Oddball::kFalse' }, 82 { 'name': 'OddballTrue', 'value': 'Oddball::kTrue' }, 83 { 'name': 'OddballTheHole', 'value': 'Oddball::kTheHole' }, 84 { 'name': 'OddballNull', 'value': 'Oddball::kNull' }, 85 { 'name': 'OddballArgumentMarker', 'value': 'Oddball::kArgumentMarker' }, 86 { 'name': 'OddballUndefined', 'value': 'Oddball::kUndefined' }, 87 { 'name': 'OddballUninitialized', 'value': 'Oddball::kUninitialized' }, 88 { 'name': 'OddballOther', 'value': 'Oddball::kOther' }, 89 { 'name': 'OddballException', 'value': 'Oddball::kException' }, 90 91 { 'name': 'prop_idx_first', 92 'value': 'DescriptorArray::kFirstIndex' }, 93 { 'name': 'prop_type_field', 94 'value': 'DATA' }, 95 { 'name': 'prop_type_mask', 96 'value': 'PropertyDetails::TypeField::kMask' }, 97 { 'name': 'prop_index_mask', 98 'value': 'PropertyDetails::FieldIndexField::kMask' }, 99 { 'name': 'prop_index_shift', 100 'value': 'PropertyDetails::FieldIndexField::kShift' }, 101 { 'name': 'prop_representation_mask', 102 'value': 'PropertyDetails::RepresentationField::kMask' }, 103 { 'name': 'prop_representation_shift', 104 'value': 'PropertyDetails::RepresentationField::kShift' }, 105 { 'name': 'prop_representation_integer8', 106 'value': 'Representation::Kind::kInteger8' }, 107 { 'name': 'prop_representation_uinteger8', 108 'value': 'Representation::Kind::kUInteger8' }, 109 { 'name': 'prop_representation_integer16', 110 'value': 'Representation::Kind::kInteger16' }, 111 { 'name': 'prop_representation_uinteger16', 112 'value': 'Representation::Kind::kUInteger16' }, 113 { 'name': 'prop_representation_smi', 114 'value': 'Representation::Kind::kSmi' }, 115 { 'name': 'prop_representation_integer32', 116 'value': 'Representation::Kind::kInteger32' }, 117 { 'name': 'prop_representation_double', 118 'value': 'Representation::Kind::kDouble' }, 119 { 'name': 'prop_representation_heapobject', 120 'value': 'Representation::Kind::kHeapObject' }, 121 { 'name': 'prop_representation_tagged', 122 'value': 'Representation::Kind::kTagged' }, 123 { 'name': 'prop_representation_external', 124 'value': 'Representation::Kind::kExternal' }, 125 126 { 'name': 'prop_desc_key', 127 'value': 'DescriptorArray::kDescriptorKey' }, 128 { 'name': 'prop_desc_details', 129 'value': 'DescriptorArray::kDescriptorDetails' }, 130 { 'name': 'prop_desc_value', 131 'value': 'DescriptorArray::kDescriptorValue' }, 132 { 'name': 'prop_desc_size', 133 'value': 'DescriptorArray::kDescriptorSize' }, 134 135 { 'name': 'elements_fast_holey_elements', 136 'value': 'FAST_HOLEY_ELEMENTS' }, 137 { 'name': 'elements_fast_elements', 138 'value': 'FAST_ELEMENTS' }, 139 { 'name': 'elements_dictionary_elements', 140 'value': 'DICTIONARY_ELEMENTS' }, 141 142 { 'name': 'bit_field2_elements_kind_mask', 143 'value': 'Map::ElementsKindBits::kMask' }, 144 { 'name': 'bit_field2_elements_kind_shift', 145 'value': 'Map::ElementsKindBits::kShift' }, 146 { 'name': 'bit_field3_dictionary_map_shift', 147 'value': 'Map::DictionaryMap::kShift' }, 148 { 'name': 'bit_field3_number_of_own_descriptors_mask', 149 'value': 'Map::NumberOfOwnDescriptorsBits::kMask' }, 150 { 'name': 'bit_field3_number_of_own_descriptors_shift', 151 'value': 'Map::NumberOfOwnDescriptorsBits::kShift' }, 152 153 { 'name': 'off_fp_context', 154 'value': 'StandardFrameConstants::kContextOffset' }, 155 { 'name': 'off_fp_constant_pool', 156 'value': 'StandardFrameConstants::kConstantPoolOffset' }, 157 { 'name': 'off_fp_marker', 158 'value': 'StandardFrameConstants::kMarkerOffset' }, 159 { 'name': 'off_fp_function', 160 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, 161 { 'name': 'off_fp_args', 162 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, 163 164 { 'name': 'scopeinfo_idx_nparams', 165 'value': 'ScopeInfo::kParameterCount' }, 166 { 'name': 'scopeinfo_idx_nstacklocals', 167 'value': 'ScopeInfo::kStackLocalCount' }, 168 { 'name': 'scopeinfo_idx_ncontextlocals', 169 'value': 'ScopeInfo::kContextLocalCount' }, 170 { 'name': 'scopeinfo_idx_ncontextglobals', 171 'value': 'ScopeInfo::kContextGlobalCount' }, 172 { 'name': 'scopeinfo_idx_first_vars', 173 'value': 'ScopeInfo::kVariablePartIndex' }, 174 175 { 'name': 'sharedfunctioninfo_start_position_mask', 176 'value': 'SharedFunctionInfo::kStartPositionMask' }, 177 { 'name': 'sharedfunctioninfo_start_position_shift', 178 'value': 'SharedFunctionInfo::kStartPositionShift' }, 179 180 { 'name': 'jsarray_buffer_was_neutered_mask', 181 'value': 'JSArrayBuffer::WasNeutered::kMask' }, 182 { 'name': 'jsarray_buffer_was_neutered_shift', 183 'value': 'JSArrayBuffer::WasNeutered::kShift' }, 184]; 185 186# 187# The following useful fields are missing accessors, so we define fake ones. 188# 189extras_accessors = [ 190 'JSFunction, context, Context, kContextOffset', 191 'Context, closure_index, int, CLOSURE_INDEX', 192 'Context, native_context_index, int, NATIVE_CONTEXT_INDEX', 193 'Context, previous_index, int, PREVIOUS_INDEX', 194 'Context, min_context_slots, int, MIN_CONTEXT_SLOTS', 195 'HeapObject, map, Map, kMapOffset', 196 'JSObject, elements, Object, kElementsOffset', 197 'FixedArray, data, uintptr_t, kHeaderSize', 198 'JSArrayBuffer, backing_store, Object, kBackingStoreOffset', 199 'JSArrayBufferView, byte_offset, Object, kByteOffsetOffset', 200 'JSTypedArray, length, Object, kLengthOffset', 201 'Map, instance_attributes, int, kInstanceAttributesOffset', 202 'Map, inobject_properties_or_constructor_function_index, int, kInObjectPropertiesOrConstructorFunctionIndexOffset', 203 'Map, instance_size, int, kInstanceSizeOffset', 204 'Map, bit_field, char, kBitFieldOffset', 205 'Map, bit_field2, char, kBitField2Offset', 206 'Map, bit_field3, int, kBitField3Offset', 207 'Map, prototype, Object, kPrototypeOffset', 208 'NameDictionaryShape, prefix_size, int, kPrefixSize', 209 'NameDictionaryShape, entry_size, int, kEntrySize', 210 'NameDictionary, prefix_start_index, int, kPrefixStartIndex', 211 'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize', 212 'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize', 213 'NumberDictionaryShape, entry_size, int, kEntrySize', 214 'Oddball, kind_offset, int, kKindOffset', 215 'HeapNumber, value, double, kValueOffset', 216 'ConsString, first, String, kFirstOffset', 217 'ConsString, second, String, kSecondOffset', 218 'ExternalString, resource, Object, kResourceOffset', 219 'SeqOneByteString, chars, char, kHeaderSize', 220 'SeqTwoByteString, chars, char, kHeaderSize', 221 'SharedFunctionInfo, code, Code, kCodeOffset', 222 'SharedFunctionInfo, scope_info, ScopeInfo, kScopeInfoOffset', 223 'SlicedString, parent, String, kParentOffset', 224 'Code, instruction_start, uintptr_t, kHeaderSize', 225 'Code, instruction_size, int, kInstructionSizeOffset', 226]; 227 228# 229# The following is a whitelist of classes we expect to find when scanning the 230# source code. This list is not exhaustive, but it's still useful to identify 231# when this script gets out of sync with the source. See load_objects(). 232# 233expected_classes = [ 234 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', 235 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', 236 'SeqOneByteString', 'SharedFunctionInfo' 237]; 238 239 240# 241# The following structures store high-level representations of the structures 242# for which we're going to emit descriptive constants. 243# 244types = {}; # set of all type names 245typeclasses = {}; # maps type names to corresponding class names 246klasses = {}; # known classes, including parents 247fields = []; # field declarations 248 249header = ''' 250/* 251 * This file is generated by %s. Do not edit directly. 252 */ 253 254#include "src/v8.h" 255#include "src/frames.h" 256#include "src/frames-inl.h" /* for architecture-specific frame constants */ 257 258using namespace v8::internal; 259 260extern "C" { 261 262/* stack frame constants */ 263#define FRAME_CONST(value, klass) \ 264 int v8dbg_frametype_##klass = StackFrame::value; 265 266STACK_FRAME_TYPE_LIST(FRAME_CONST) 267 268#undef FRAME_CONST 269 270''' % sys.argv[0]; 271 272footer = ''' 273} 274''' 275 276# 277# Get the base class 278# 279def get_base_class(klass): 280 if (klass == 'Object'): 281 return klass; 282 283 if (not (klass in klasses)): 284 return None; 285 286 k = klasses[klass]; 287 288 return get_base_class(k['parent']); 289 290# 291# Loads class hierarchy and type information from "objects.h". 292# 293def load_objects(): 294 objfilename = sys.argv[2]; 295 objfile = open(objfilename, 'r'); 296 in_insttype = False; 297 298 typestr = ''; 299 300 # 301 # Construct a dictionary for the classes we're sure should be present. 302 # 303 checktypes = {}; 304 for klass in expected_classes: 305 checktypes[klass] = True; 306 307 # 308 # Iterate objects.h line-by-line to collect type and class information. 309 # For types, we accumulate a string representing the entire InstanceType 310 # enum definition and parse it later because it's easier to do so 311 # without the embedded newlines. 312 # 313 for line in objfile: 314 if (line.startswith('enum InstanceType {')): 315 in_insttype = True; 316 continue; 317 318 if (in_insttype and line.startswith('};')): 319 in_insttype = False; 320 continue; 321 322 line = re.sub('//.*', '', line.strip()); 323 324 if (in_insttype): 325 typestr += line; 326 continue; 327 328 match = re.match('class (\w[^:]*)(: public (\w[^{]*))?\s*{\s*', 329 line); 330 331 if (match): 332 klass = match.group(1).strip(); 333 pklass = match.group(3); 334 if (pklass): 335 pklass = pklass.strip(); 336 klasses[klass] = { 'parent': pklass }; 337 338 # 339 # Process the instance type declaration. 340 # 341 entries = typestr.split(','); 342 for entry in entries: 343 types[re.sub('\s*=.*', '', entry).lstrip()] = True; 344 345 # 346 # Infer class names for each type based on a systematic transformation. 347 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the 348 # class for each type rather than the other way around because there are 349 # fewer cases where one type maps to more than one class than the other 350 # way around. 351 # 352 for type in types: 353 # 354 # Symbols and Strings are implemented using the same classes. 355 # 356 usetype = re.sub('SYMBOL_', 'STRING_', type); 357 358 # 359 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. 360 # 361 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); 362 363 # 364 # Remove the "_TYPE" suffix and then convert to camel case, 365 # except that a "JS" prefix remains uppercase (as in 366 # "JS_FUNCTION_TYPE" => "JSFunction"). 367 # 368 if (not usetype.endswith('_TYPE')): 369 continue; 370 371 usetype = usetype[0:len(usetype) - len('_TYPE')]; 372 parts = usetype.split('_'); 373 cctype = ''; 374 375 if (parts[0] == 'JS'): 376 cctype = 'JS'; 377 start = 1; 378 else: 379 cctype = ''; 380 start = 0; 381 382 for ii in range(start, len(parts)): 383 part = parts[ii]; 384 cctype += part[0].upper() + part[1:].lower(); 385 386 # 387 # Mapping string types is more complicated. Both types and 388 # class names for Strings specify a representation (e.g., Seq, 389 # Cons, External, or Sliced) and an encoding (TwoByte/OneByte), 390 # In the simplest case, both of these are explicit in both 391 # names, as in: 392 # 393 # EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString 394 # 395 # However, either the representation or encoding can be omitted 396 # from the type name, in which case "Seq" and "TwoByte" are 397 # assumed, as in: 398 # 399 # STRING_TYPE => SeqTwoByteString 400 # 401 # Additionally, sometimes the type name has more information 402 # than the class, as in: 403 # 404 # CONS_ONE_BYTE_STRING_TYPE => ConsString 405 # 406 # To figure this out dynamically, we first check for a 407 # representation and encoding and add them if they're not 408 # present. If that doesn't yield a valid class name, then we 409 # strip out the representation. 410 # 411 if (cctype.endswith('String')): 412 if (cctype.find('Cons') == -1 and 413 cctype.find('External') == -1 and 414 cctype.find('Sliced') == -1): 415 if (cctype.find('OneByte') != -1): 416 cctype = re.sub('OneByteString$', 417 'SeqOneByteString', cctype); 418 else: 419 cctype = re.sub('String$', 420 'SeqString', cctype); 421 422 if (cctype.find('OneByte') == -1): 423 cctype = re.sub('String$', 'TwoByteString', 424 cctype); 425 426 if (not (cctype in klasses)): 427 cctype = re.sub('OneByte', '', cctype); 428 cctype = re.sub('TwoByte', '', cctype); 429 430 # 431 # Despite all that, some types have no corresponding class. 432 # 433 if (cctype in klasses): 434 typeclasses[type] = cctype; 435 if (cctype in checktypes): 436 del checktypes[cctype]; 437 438 if (len(checktypes) > 0): 439 for klass in checktypes: 440 print('error: expected class \"%s\" not found' % klass); 441 442 sys.exit(1); 443 444 445# 446# For a given macro call, pick apart the arguments and return an object 447# describing the corresponding output constant. See load_fields(). 448# 449def parse_field(call): 450 # Replace newlines with spaces. 451 for ii in range(0, len(call)): 452 if (call[ii] == '\n'): 453 call[ii] == ' '; 454 455 idx = call.find('('); 456 kind = call[0:idx]; 457 rest = call[idx + 1: len(call) - 1]; 458 args = re.split('\s*,\s*', rest); 459 460 consts = []; 461 462 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): 463 klass = args[0]; 464 field = args[1]; 465 dtype = args[2]; 466 offset = args[3]; 467 468 return ({ 469 'name': 'class_%s__%s__%s' % (klass, field, dtype), 470 'value': '%s::%s' % (klass, offset) 471 }); 472 473 assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI'); 474 klass = args[0]; 475 field = args[1]; 476 offset = args[2]; 477 478 return ({ 479 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), 480 'value': '%s::%s' % (klass, offset) 481 }); 482 483# 484# Load field offset information from objects-inl.h. 485# 486def load_fields(): 487 inlfilename = sys.argv[3]; 488 inlfile = open(inlfilename, 'r'); 489 490 # 491 # Each class's fields and the corresponding offsets are described in the 492 # source by calls to macros like "ACCESSORS" (and friends). All we do 493 # here is extract these macro invocations, taking into account that they 494 # may span multiple lines and may contain nested parentheses. We also 495 # call parse_field() to pick apart the invocation. 496 # 497 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 498 'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ]; 499 current = ''; 500 opens = 0; 501 502 for line in inlfile: 503 if (opens > 0): 504 # Continuation line 505 for ii in range(0, len(line)): 506 if (line[ii] == '('): 507 opens += 1; 508 elif (line[ii] == ')'): 509 opens -= 1; 510 511 if (opens == 0): 512 break; 513 514 current += line[0:ii + 1]; 515 continue; 516 517 for prefix in prefixes: 518 if (not line.startswith(prefix + '(')): 519 continue; 520 521 if (len(current) > 0): 522 fields.append(parse_field(current)); 523 current = ''; 524 525 for ii in range(len(prefix), len(line)): 526 if (line[ii] == '('): 527 opens += 1; 528 elif (line[ii] == ')'): 529 opens -= 1; 530 531 if (opens == 0): 532 break; 533 534 current += line[0:ii + 1]; 535 536 if (len(current) > 0): 537 fields.append(parse_field(current)); 538 current = ''; 539 540 for body in extras_accessors: 541 fields.append(parse_field('ACCESSORS(%s)' % body)); 542 543# 544# Emit a block of constants. 545# 546def emit_set(out, consts): 547 # Fix up overzealous parses. This could be done inside the 548 # parsers but as there are several, it's easiest to do it here. 549 ws = re.compile('\s+') 550 for const in consts: 551 name = ws.sub('', const['name']) 552 value = ws.sub('', str(const['value'])) # Can be a number. 553 out.write('int v8dbg_%s = %s;\n' % (name, value)) 554 out.write('\n'); 555 556# 557# Emit the whole output file. 558# 559def emit_config(): 560 out = file(sys.argv[1], 'w'); 561 562 out.write(header); 563 564 out.write('/* miscellaneous constants */\n'); 565 emit_set(out, consts_misc); 566 567 out.write('/* class type information */\n'); 568 consts = []; 569 keys = typeclasses.keys(); 570 keys.sort(); 571 for typename in keys: 572 klass = typeclasses[typename]; 573 consts.append({ 574 'name': 'type_%s__%s' % (klass, typename), 575 'value': typename 576 }); 577 578 emit_set(out, consts); 579 580 out.write('/* class hierarchy information */\n'); 581 consts = []; 582 keys = klasses.keys(); 583 keys.sort(); 584 for klassname in keys: 585 pklass = klasses[klassname]['parent']; 586 bklass = get_base_class(klassname); 587 if (bklass != 'Object'): 588 continue; 589 if (pklass == None): 590 continue; 591 592 consts.append({ 593 'name': 'parent_%s__%s' % (klassname, pklass), 594 'value': 0 595 }); 596 597 emit_set(out, consts); 598 599 out.write('/* field information */\n'); 600 emit_set(out, fields); 601 602 out.write(footer); 603 604if (len(sys.argv) < 4): 605 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); 606 sys.exit(2); 607 608load_objects(); 609load_fields(); 610emit_config(); 611