1import re 2import sys 3import copy 4import types 5import inspect 6import keyword 7import builtins 8import functools 9import _thread 10 11 12__all__ = ['dataclass', 13 'field', 14 'Field', 15 'FrozenInstanceError', 16 'InitVar', 17 'MISSING', 18 19 # Helper functions. 20 'fields', 21 'asdict', 22 'astuple', 23 'make_dataclass', 24 'replace', 25 'is_dataclass', 26 ] 27 28# Conditions for adding methods. The boxes indicate what action the 29# dataclass decorator takes. For all of these tables, when I talk 30# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm 31# referring to the arguments to the @dataclass decorator. When 32# checking if a dunder method already exists, I mean check for an 33# entry in the class's __dict__. I never check to see if an attribute 34# is defined in a base class. 35 36# Key: 37# +=========+=========================================+ 38# + Value | Meaning | 39# +=========+=========================================+ 40# | <blank> | No action: no method is added. | 41# +---------+-----------------------------------------+ 42# | add | Generated method is added. | 43# +---------+-----------------------------------------+ 44# | raise | TypeError is raised. | 45# +---------+-----------------------------------------+ 46# | None | Attribute is set to None. | 47# +=========+=========================================+ 48 49# __init__ 50# 51# +--- init= parameter 52# | 53# v | | | 54# | no | yes | <--- class has __init__ in __dict__? 55# +=======+=======+=======+ 56# | False | | | 57# +-------+-------+-------+ 58# | True | add | | <- the default 59# +=======+=======+=======+ 60 61# __repr__ 62# 63# +--- repr= parameter 64# | 65# v | | | 66# | no | yes | <--- class has __repr__ in __dict__? 67# +=======+=======+=======+ 68# | False | | | 69# +-------+-------+-------+ 70# | True | add | | <- the default 71# +=======+=======+=======+ 72 73 74# __setattr__ 75# __delattr__ 76# 77# +--- frozen= parameter 78# | 79# v | | | 80# | no | yes | <--- class has __setattr__ or __delattr__ in __dict__? 81# +=======+=======+=======+ 82# | False | | | <- the default 83# +-------+-------+-------+ 84# | True | add | raise | 85# +=======+=======+=======+ 86# Raise because not adding these methods would break the "frozen-ness" 87# of the class. 88 89# __eq__ 90# 91# +--- eq= parameter 92# | 93# v | | | 94# | no | yes | <--- class has __eq__ in __dict__? 95# +=======+=======+=======+ 96# | False | | | 97# +-------+-------+-------+ 98# | True | add | | <- the default 99# +=======+=======+=======+ 100 101# __lt__ 102# __le__ 103# __gt__ 104# __ge__ 105# 106# +--- order= parameter 107# | 108# v | | | 109# | no | yes | <--- class has any comparison method in __dict__? 110# +=======+=======+=======+ 111# | False | | | <- the default 112# +-------+-------+-------+ 113# | True | add | raise | 114# +=======+=======+=======+ 115# Raise because to allow this case would interfere with using 116# functools.total_ordering. 117 118# __hash__ 119 120# +------------------- unsafe_hash= parameter 121# | +----------- eq= parameter 122# | | +--- frozen= parameter 123# | | | 124# v v v | | | 125# | no | yes | <--- class has explicitly defined __hash__ 126# +=======+=======+=======+========+========+ 127# | False | False | False | | | No __eq__, use the base class __hash__ 128# +-------+-------+-------+--------+--------+ 129# | False | False | True | | | No __eq__, use the base class __hash__ 130# +-------+-------+-------+--------+--------+ 131# | False | True | False | None | | <-- the default, not hashable 132# +-------+-------+-------+--------+--------+ 133# | False | True | True | add | | Frozen, so hashable, allows override 134# +-------+-------+-------+--------+--------+ 135# | True | False | False | add | raise | Has no __eq__, but hashable 136# +-------+-------+-------+--------+--------+ 137# | True | False | True | add | raise | Has no __eq__, but hashable 138# +-------+-------+-------+--------+--------+ 139# | True | True | False | add | raise | Not frozen, but hashable 140# +-------+-------+-------+--------+--------+ 141# | True | True | True | add | raise | Frozen, so hashable 142# +=======+=======+=======+========+========+ 143# For boxes that are blank, __hash__ is untouched and therefore 144# inherited from the base class. If the base is object, then 145# id-based hashing is used. 146# 147# Note that a class may already have __hash__=None if it specified an 148# __eq__ method in the class body (not one that was created by 149# @dataclass). 150# 151# See _hash_action (below) for a coded version of this table. 152 153 154# Raised when an attempt is made to modify a frozen class. 155class FrozenInstanceError(AttributeError): pass 156 157# A sentinel object for default values to signal that a default 158# factory will be used. This is given a nice repr() which will appear 159# in the function signature of dataclasses' constructors. 160class _HAS_DEFAULT_FACTORY_CLASS: 161 def __repr__(self): 162 return '<factory>' 163_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS() 164 165# A sentinel object to detect if a parameter is supplied or not. Use 166# a class to give it a better repr. 167class _MISSING_TYPE: 168 pass 169MISSING = _MISSING_TYPE() 170 171# Since most per-field metadata will be unused, create an empty 172# read-only proxy that can be shared among all fields. 173_EMPTY_METADATA = types.MappingProxyType({}) 174 175# Markers for the various kinds of fields and pseudo-fields. 176class _FIELD_BASE: 177 def __init__(self, name): 178 self.name = name 179 def __repr__(self): 180 return self.name 181_FIELD = _FIELD_BASE('_FIELD') 182_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR') 183_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR') 184 185# The name of an attribute on the class where we store the Field 186# objects. Also used to check if a class is a Data Class. 187_FIELDS = '__dataclass_fields__' 188 189# The name of an attribute on the class that stores the parameters to 190# @dataclass. 191_PARAMS = '__dataclass_params__' 192 193# The name of the function, that if it exists, is called at the end of 194# __init__. 195_POST_INIT_NAME = '__post_init__' 196 197# String regex that string annotations for ClassVar or InitVar must match. 198# Allows "identifier.identifier[" or "identifier[". 199# https://bugs.python.org/issue33453 for details. 200_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') 201 202class _InitVarMeta(type): 203 def __getitem__(self, params): 204 return self 205 206class InitVar(metaclass=_InitVarMeta): 207 pass 208 209 210# Instances of Field are only ever created from within this module, 211# and only from the field() function, although Field instances are 212# exposed externally as (conceptually) read-only objects. 213# 214# name and type are filled in after the fact, not in __init__. 215# They're not known at the time this class is instantiated, but it's 216# convenient if they're available later. 217# 218# When cls._FIELDS is filled in with a list of Field objects, the name 219# and type fields will have been populated. 220class Field: 221 __slots__ = ('name', 222 'type', 223 'default', 224 'default_factory', 225 'repr', 226 'hash', 227 'init', 228 'compare', 229 'metadata', 230 '_field_type', # Private: not to be used by user code. 231 ) 232 233 def __init__(self, default, default_factory, init, repr, hash, compare, 234 metadata): 235 self.name = None 236 self.type = None 237 self.default = default 238 self.default_factory = default_factory 239 self.init = init 240 self.repr = repr 241 self.hash = hash 242 self.compare = compare 243 self.metadata = (_EMPTY_METADATA 244 if metadata is None else 245 types.MappingProxyType(metadata)) 246 self._field_type = None 247 248 def __repr__(self): 249 return ('Field(' 250 f'name={self.name!r},' 251 f'type={self.type!r},' 252 f'default={self.default!r},' 253 f'default_factory={self.default_factory!r},' 254 f'init={self.init!r},' 255 f'repr={self.repr!r},' 256 f'hash={self.hash!r},' 257 f'compare={self.compare!r},' 258 f'metadata={self.metadata!r},' 259 f'_field_type={self._field_type}' 260 ')') 261 262 # This is used to support the PEP 487 __set_name__ protocol in the 263 # case where we're using a field that contains a descriptor as a 264 # default value. For details on __set_name__, see 265 # https://www.python.org/dev/peps/pep-0487/#implementation-details. 266 # 267 # Note that in _process_class, this Field object is overwritten 268 # with the default value, so the end result is a descriptor that 269 # had __set_name__ called on it at the right time. 270 def __set_name__(self, owner, name): 271 func = getattr(type(self.default), '__set_name__', None) 272 if func: 273 # There is a __set_name__ method on the descriptor, call 274 # it. 275 func(self.default, owner, name) 276 277 278class _DataclassParams: 279 __slots__ = ('init', 280 'repr', 281 'eq', 282 'order', 283 'unsafe_hash', 284 'frozen', 285 ) 286 287 def __init__(self, init, repr, eq, order, unsafe_hash, frozen): 288 self.init = init 289 self.repr = repr 290 self.eq = eq 291 self.order = order 292 self.unsafe_hash = unsafe_hash 293 self.frozen = frozen 294 295 def __repr__(self): 296 return ('_DataclassParams(' 297 f'init={self.init!r},' 298 f'repr={self.repr!r},' 299 f'eq={self.eq!r},' 300 f'order={self.order!r},' 301 f'unsafe_hash={self.unsafe_hash!r},' 302 f'frozen={self.frozen!r}' 303 ')') 304 305 306# This function is used instead of exposing Field creation directly, 307# so that a type checker can be told (via overloads) that this is a 308# function whose type depends on its parameters. 309def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, 310 hash=None, compare=True, metadata=None): 311 """Return an object to identify dataclass fields. 312 313 default is the default value of the field. default_factory is a 314 0-argument function called to initialize a field's value. If init 315 is True, the field will be a parameter to the class's __init__() 316 function. If repr is True, the field will be included in the 317 object's repr(). If hash is True, the field will be included in 318 the object's hash(). If compare is True, the field will be used 319 in comparison functions. metadata, if specified, must be a 320 mapping which is stored but not otherwise examined by dataclass. 321 322 It is an error to specify both default and default_factory. 323 """ 324 325 if default is not MISSING and default_factory is not MISSING: 326 raise ValueError('cannot specify both default and default_factory') 327 return Field(default, default_factory, init, repr, hash, compare, 328 metadata) 329 330 331def _tuple_str(obj_name, fields): 332 # Return a string representing each field of obj_name as a tuple 333 # member. So, if fields is ['x', 'y'] and obj_name is "self", 334 # return "(self.x,self.y)". 335 336 # Special case for the 0-tuple. 337 if not fields: 338 return '()' 339 # Note the trailing comma, needed if this turns out to be a 1-tuple. 340 return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)' 341 342 343# This function's logic is copied from "recursive_repr" function in 344# reprlib module to avoid dependency. 345def _recursive_repr(user_function): 346 # Decorator to make a repr function return "..." for a recursive 347 # call. 348 repr_running = set() 349 350 @functools.wraps(user_function) 351 def wrapper(self): 352 key = id(self), _thread.get_ident() 353 if key in repr_running: 354 return '...' 355 repr_running.add(key) 356 try: 357 result = user_function(self) 358 finally: 359 repr_running.discard(key) 360 return result 361 return wrapper 362 363 364def _create_fn(name, args, body, *, globals=None, locals=None, 365 return_type=MISSING): 366 # Note that we mutate locals when exec() is called. Caller 367 # beware! The only callers are internal to this module, so no 368 # worries about external callers. 369 if locals is None: 370 locals = {} 371 # __builtins__ may be the "builtins" module or 372 # the value of its "__dict__", 373 # so make sure "__builtins__" is the module. 374 if globals is not None and '__builtins__' not in globals: 375 globals['__builtins__'] = builtins 376 return_annotation = '' 377 if return_type is not MISSING: 378 locals['_return_type'] = return_type 379 return_annotation = '->_return_type' 380 args = ','.join(args) 381 body = '\n'.join(f' {b}' for b in body) 382 383 # Compute the text of the entire function. 384 txt = f'def {name}({args}){return_annotation}:\n{body}' 385 386 exec(txt, globals, locals) 387 return locals[name] 388 389 390def _field_assign(frozen, name, value, self_name): 391 # If we're a frozen class, then assign to our fields in __init__ 392 # via object.__setattr__. Otherwise, just use a simple 393 # assignment. 394 # 395 # self_name is what "self" is called in this function: don't 396 # hard-code "self", since that might be a field name. 397 if frozen: 398 return f'__builtins__.object.__setattr__({self_name},{name!r},{value})' 399 return f'{self_name}.{name}={value}' 400 401 402def _field_init(f, frozen, globals, self_name): 403 # Return the text of the line in the body of __init__ that will 404 # initialize this field. 405 406 default_name = f'_dflt_{f.name}' 407 if f.default_factory is not MISSING: 408 if f.init: 409 # This field has a default factory. If a parameter is 410 # given, use it. If not, call the factory. 411 globals[default_name] = f.default_factory 412 value = (f'{default_name}() ' 413 f'if {f.name} is _HAS_DEFAULT_FACTORY ' 414 f'else {f.name}') 415 else: 416 # This is a field that's not in the __init__ params, but 417 # has a default factory function. It needs to be 418 # initialized here by calling the factory function, 419 # because there's no other way to initialize it. 420 421 # For a field initialized with a default=defaultvalue, the 422 # class dict just has the default value 423 # (cls.fieldname=defaultvalue). But that won't work for a 424 # default factory, the factory must be called in __init__ 425 # and we must assign that to self.fieldname. We can't 426 # fall back to the class dict's value, both because it's 427 # not set, and because it might be different per-class 428 # (which, after all, is why we have a factory function!). 429 430 globals[default_name] = f.default_factory 431 value = f'{default_name}()' 432 else: 433 # No default factory. 434 if f.init: 435 if f.default is MISSING: 436 # There's no default, just do an assignment. 437 value = f.name 438 elif f.default is not MISSING: 439 globals[default_name] = f.default 440 value = f.name 441 else: 442 # This field does not need initialization. Signify that 443 # to the caller by returning None. 444 return None 445 446 # Only test this now, so that we can create variables for the 447 # default. However, return None to signify that we're not going 448 # to actually do the assignment statement for InitVars. 449 if f._field_type is _FIELD_INITVAR: 450 return None 451 452 # Now, actually generate the field assignment. 453 return _field_assign(frozen, f.name, value, self_name) 454 455 456def _init_param(f): 457 # Return the __init__ parameter string for this field. For 458 # example, the equivalent of 'x:int=3' (except instead of 'int', 459 # reference a variable set to int, and instead of '3', reference a 460 # variable set to 3). 461 if f.default is MISSING and f.default_factory is MISSING: 462 # There's no default, and no default_factory, just output the 463 # variable name and type. 464 default = '' 465 elif f.default is not MISSING: 466 # There's a default, this will be the name that's used to look 467 # it up. 468 default = f'=_dflt_{f.name}' 469 elif f.default_factory is not MISSING: 470 # There's a factory function. Set a marker. 471 default = '=_HAS_DEFAULT_FACTORY' 472 return f'{f.name}:_type_{f.name}{default}' 473 474 475def _init_fn(fields, frozen, has_post_init, self_name): 476 # fields contains both real fields and InitVar pseudo-fields. 477 478 # Make sure we don't have fields without defaults following fields 479 # with defaults. This actually would be caught when exec-ing the 480 # function source code, but catching it here gives a better error 481 # message, and future-proofs us in case we build up the function 482 # using ast. 483 seen_default = False 484 for f in fields: 485 # Only consider fields in the __init__ call. 486 if f.init: 487 if not (f.default is MISSING and f.default_factory is MISSING): 488 seen_default = True 489 elif seen_default: 490 raise TypeError(f'non-default argument {f.name!r} ' 491 'follows default argument') 492 493 globals = {'MISSING': MISSING, 494 '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY} 495 496 body_lines = [] 497 for f in fields: 498 line = _field_init(f, frozen, globals, self_name) 499 # line is None means that this field doesn't require 500 # initialization (it's a pseudo-field). Just skip it. 501 if line: 502 body_lines.append(line) 503 504 # Does this class have a post-init function? 505 if has_post_init: 506 params_str = ','.join(f.name for f in fields 507 if f._field_type is _FIELD_INITVAR) 508 body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})') 509 510 # If no body lines, use 'pass'. 511 if not body_lines: 512 body_lines = ['pass'] 513 514 locals = {f'_type_{f.name}': f.type for f in fields} 515 return _create_fn('__init__', 516 [self_name] + [_init_param(f) for f in fields if f.init], 517 body_lines, 518 locals=locals, 519 globals=globals, 520 return_type=None) 521 522 523def _repr_fn(fields): 524 fn = _create_fn('__repr__', 525 ('self',), 526 ['return self.__class__.__qualname__ + f"(' + 527 ', '.join([f"{f.name}={{self.{f.name}!r}}" 528 for f in fields]) + 529 ')"']) 530 return _recursive_repr(fn) 531 532 533def _frozen_get_del_attr(cls, fields): 534 # XXX: globals is modified on the first call to _create_fn, then 535 # the modified version is used in the second call. Is this okay? 536 globals = {'cls': cls, 537 'FrozenInstanceError': FrozenInstanceError} 538 if fields: 539 fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)' 540 else: 541 # Special case for the zero-length tuple. 542 fields_str = '()' 543 return (_create_fn('__setattr__', 544 ('self', 'name', 'value'), 545 (f'if type(self) is cls or name in {fields_str}:', 546 ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', 547 f'super(cls, self).__setattr__(name, value)'), 548 globals=globals), 549 _create_fn('__delattr__', 550 ('self', 'name'), 551 (f'if type(self) is cls or name in {fields_str}:', 552 ' raise FrozenInstanceError(f"cannot delete field {name!r}")', 553 f'super(cls, self).__delattr__(name)'), 554 globals=globals), 555 ) 556 557 558def _cmp_fn(name, op, self_tuple, other_tuple): 559 # Create a comparison function. If the fields in the object are 560 # named 'x' and 'y', then self_tuple is the string 561 # '(self.x,self.y)' and other_tuple is the string 562 # '(other.x,other.y)'. 563 564 return _create_fn(name, 565 ('self', 'other'), 566 [ 'if other.__class__ is self.__class__:', 567 f' return {self_tuple}{op}{other_tuple}', 568 'return NotImplemented']) 569 570 571def _hash_fn(fields): 572 self_tuple = _tuple_str('self', fields) 573 return _create_fn('__hash__', 574 ('self',), 575 [f'return hash({self_tuple})']) 576 577 578def _is_classvar(a_type, typing): 579 # This test uses a typing internal class, but it's the best way to 580 # test if this is a ClassVar. 581 return (a_type is typing.ClassVar 582 or (type(a_type) is typing._GenericAlias 583 and a_type.__origin__ is typing.ClassVar)) 584 585 586def _is_initvar(a_type, dataclasses): 587 # The module we're checking against is the module we're 588 # currently in (dataclasses.py). 589 return a_type is dataclasses.InitVar 590 591 592def _is_type(annotation, cls, a_module, a_type, is_type_predicate): 593 # Given a type annotation string, does it refer to a_type in 594 # a_module? For example, when checking that annotation denotes a 595 # ClassVar, then a_module is typing, and a_type is 596 # typing.ClassVar. 597 598 # It's possible to look up a_module given a_type, but it involves 599 # looking in sys.modules (again!), and seems like a waste since 600 # the caller already knows a_module. 601 602 # - annotation is a string type annotation 603 # - cls is the class that this annotation was found in 604 # - a_module is the module we want to match 605 # - a_type is the type in that module we want to match 606 # - is_type_predicate is a function called with (obj, a_module) 607 # that determines if obj is of the desired type. 608 609 # Since this test does not do a local namespace lookup (and 610 # instead only a module (global) lookup), there are some things it 611 # gets wrong. 612 613 # With string annotations, cv0 will be detected as a ClassVar: 614 # CV = ClassVar 615 # @dataclass 616 # class C0: 617 # cv0: CV 618 619 # But in this example cv1 will not be detected as a ClassVar: 620 # @dataclass 621 # class C1: 622 # CV = ClassVar 623 # cv1: CV 624 625 # In C1, the code in this function (_is_type) will look up "CV" in 626 # the module and not find it, so it will not consider cv1 as a 627 # ClassVar. This is a fairly obscure corner case, and the best 628 # way to fix it would be to eval() the string "CV" with the 629 # correct global and local namespaces. However that would involve 630 # a eval() penalty for every single field of every dataclass 631 # that's defined. It was judged not worth it. 632 633 match = _MODULE_IDENTIFIER_RE.match(annotation) 634 if match: 635 ns = None 636 module_name = match.group(1) 637 if not module_name: 638 # No module name, assume the class's module did 639 # "from dataclasses import InitVar". 640 ns = sys.modules.get(cls.__module__).__dict__ 641 else: 642 # Look up module_name in the class's module. 643 module = sys.modules.get(cls.__module__) 644 if module and module.__dict__.get(module_name) is a_module: 645 ns = sys.modules.get(a_type.__module__).__dict__ 646 if ns and is_type_predicate(ns.get(match.group(2)), a_module): 647 return True 648 return False 649 650 651def _get_field(cls, a_name, a_type): 652 # Return a Field object for this field name and type. ClassVars 653 # and InitVars are also returned, but marked as such (see 654 # f._field_type). 655 656 # If the default value isn't derived from Field, then it's only a 657 # normal default value. Convert it to a Field(). 658 default = getattr(cls, a_name, MISSING) 659 if isinstance(default, Field): 660 f = default 661 else: 662 if isinstance(default, types.MemberDescriptorType): 663 # This is a field in __slots__, so it has no default value. 664 default = MISSING 665 f = field(default=default) 666 667 # Only at this point do we know the name and the type. Set them. 668 f.name = a_name 669 f.type = a_type 670 671 # Assume it's a normal field until proven otherwise. We're next 672 # going to decide if it's a ClassVar or InitVar, everything else 673 # is just a normal field. 674 f._field_type = _FIELD 675 676 # In addition to checking for actual types here, also check for 677 # string annotations. get_type_hints() won't always work for us 678 # (see https://github.com/python/typing/issues/508 for example), 679 # plus it's expensive and would require an eval for every stirng 680 # annotation. So, make a best effort to see if this is a ClassVar 681 # or InitVar using regex's and checking that the thing referenced 682 # is actually of the correct type. 683 684 # For the complete discussion, see https://bugs.python.org/issue33453 685 686 # If typing has not been imported, then it's impossible for any 687 # annotation to be a ClassVar. So, only look for ClassVar if 688 # typing has been imported by any module (not necessarily cls's 689 # module). 690 typing = sys.modules.get('typing') 691 if typing: 692 if (_is_classvar(a_type, typing) 693 or (isinstance(f.type, str) 694 and _is_type(f.type, cls, typing, typing.ClassVar, 695 _is_classvar))): 696 f._field_type = _FIELD_CLASSVAR 697 698 # If the type is InitVar, or if it's a matching string annotation, 699 # then it's an InitVar. 700 if f._field_type is _FIELD: 701 # The module we're checking against is the module we're 702 # currently in (dataclasses.py). 703 dataclasses = sys.modules[__name__] 704 if (_is_initvar(a_type, dataclasses) 705 or (isinstance(f.type, str) 706 and _is_type(f.type, cls, dataclasses, dataclasses.InitVar, 707 _is_initvar))): 708 f._field_type = _FIELD_INITVAR 709 710 # Validations for individual fields. This is delayed until now, 711 # instead of in the Field() constructor, since only here do we 712 # know the field name, which allows for better error reporting. 713 714 # Special restrictions for ClassVar and InitVar. 715 if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR): 716 if f.default_factory is not MISSING: 717 raise TypeError(f'field {f.name} cannot have a ' 718 'default factory') 719 # Should I check for other field settings? default_factory 720 # seems the most serious to check for. Maybe add others. For 721 # example, how about init=False (or really, 722 # init=<not-the-default-init-value>)? It makes no sense for 723 # ClassVar and InitVar to specify init=<anything>. 724 725 # For real fields, disallow mutable defaults for known types. 726 if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)): 727 raise ValueError(f'mutable default {type(f.default)} for field ' 728 f'{f.name} is not allowed: use default_factory') 729 730 return f 731 732 733def _set_new_attribute(cls, name, value): 734 # Never overwrites an existing attribute. Returns True if the 735 # attribute already exists. 736 if name in cls.__dict__: 737 return True 738 setattr(cls, name, value) 739 return False 740 741 742# Decide if/how we're going to create a hash function. Key is 743# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to 744# take. The common case is to do nothing, so instead of providing a 745# function that is a no-op, use None to signify that. 746 747def _hash_set_none(cls, fields): 748 return None 749 750def _hash_add(cls, fields): 751 flds = [f for f in fields if (f.compare if f.hash is None else f.hash)] 752 return _hash_fn(flds) 753 754def _hash_exception(cls, fields): 755 # Raise an exception. 756 raise TypeError(f'Cannot overwrite attribute __hash__ ' 757 f'in class {cls.__name__}') 758 759# 760# +-------------------------------------- unsafe_hash? 761# | +------------------------------- eq? 762# | | +------------------------ frozen? 763# | | | +---------------- has-explicit-hash? 764# | | | | 765# | | | | +------- action 766# | | | | | 767# v v v v v 768_hash_action = {(False, False, False, False): None, 769 (False, False, False, True ): None, 770 (False, False, True, False): None, 771 (False, False, True, True ): None, 772 (False, True, False, False): _hash_set_none, 773 (False, True, False, True ): None, 774 (False, True, True, False): _hash_add, 775 (False, True, True, True ): None, 776 (True, False, False, False): _hash_add, 777 (True, False, False, True ): _hash_exception, 778 (True, False, True, False): _hash_add, 779 (True, False, True, True ): _hash_exception, 780 (True, True, False, False): _hash_add, 781 (True, True, False, True ): _hash_exception, 782 (True, True, True, False): _hash_add, 783 (True, True, True, True ): _hash_exception, 784 } 785# See https://bugs.python.org/issue32929#msg312829 for an if-statement 786# version of this table. 787 788 789def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): 790 # Now that dicts retain insertion order, there's no reason to use 791 # an ordered dict. I am leveraging that ordering here, because 792 # derived class fields overwrite base class fields, but the order 793 # is defined by the base class, which is found first. 794 fields = {} 795 796 setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order, 797 unsafe_hash, frozen)) 798 799 # Find our base classes in reverse MRO order, and exclude 800 # ourselves. In reversed order so that more derived classes 801 # override earlier field definitions in base classes. As long as 802 # we're iterating over them, see if any are frozen. 803 any_frozen_base = False 804 has_dataclass_bases = False 805 for b in cls.__mro__[-1:0:-1]: 806 # Only process classes that have been processed by our 807 # decorator. That is, they have a _FIELDS attribute. 808 base_fields = getattr(b, _FIELDS, None) 809 if base_fields: 810 has_dataclass_bases = True 811 for f in base_fields.values(): 812 fields[f.name] = f 813 if getattr(b, _PARAMS).frozen: 814 any_frozen_base = True 815 816 # Annotations that are defined in this class (not in base 817 # classes). If __annotations__ isn't present, then this class 818 # adds no new annotations. We use this to compute fields that are 819 # added by this class. 820 # 821 # Fields are found from cls_annotations, which is guaranteed to be 822 # ordered. Default values are from class attributes, if a field 823 # has a default. If the default value is a Field(), then it 824 # contains additional info beyond (and possibly including) the 825 # actual default value. Pseudo-fields ClassVars and InitVars are 826 # included, despite the fact that they're not real fields. That's 827 # dealt with later. 828 cls_annotations = cls.__dict__.get('__annotations__', {}) 829 830 # Now find fields in our class. While doing so, validate some 831 # things, and set the default values (as class attributes) where 832 # we can. 833 cls_fields = [_get_field(cls, name, type) 834 for name, type in cls_annotations.items()] 835 for f in cls_fields: 836 fields[f.name] = f 837 838 # If the class attribute (which is the default value for this 839 # field) exists and is of type 'Field', replace it with the 840 # real default. This is so that normal class introspection 841 # sees a real default value, not a Field. 842 if isinstance(getattr(cls, f.name, None), Field): 843 if f.default is MISSING: 844 # If there's no default, delete the class attribute. 845 # This happens if we specify field(repr=False), for 846 # example (that is, we specified a field object, but 847 # no default value). Also if we're using a default 848 # factory. The class attribute should not be set at 849 # all in the post-processed class. 850 delattr(cls, f.name) 851 else: 852 setattr(cls, f.name, f.default) 853 854 # Do we have any Field members that don't also have annotations? 855 for name, value in cls.__dict__.items(): 856 if isinstance(value, Field) and not name in cls_annotations: 857 raise TypeError(f'{name!r} is a field but has no type annotation') 858 859 # Check rules that apply if we are derived from any dataclasses. 860 if has_dataclass_bases: 861 # Raise an exception if any of our bases are frozen, but we're not. 862 if any_frozen_base and not frozen: 863 raise TypeError('cannot inherit non-frozen dataclass from a ' 864 'frozen one') 865 866 # Raise an exception if we're frozen, but none of our bases are. 867 if not any_frozen_base and frozen: 868 raise TypeError('cannot inherit frozen dataclass from a ' 869 'non-frozen one') 870 871 # Remember all of the fields on our class (including bases). This 872 # also marks this class as being a dataclass. 873 setattr(cls, _FIELDS, fields) 874 875 # Was this class defined with an explicit __hash__? Note that if 876 # __eq__ is defined in this class, then python will automatically 877 # set __hash__ to None. This is a heuristic, as it's possible 878 # that such a __hash__ == None was not auto-generated, but it 879 # close enough. 880 class_hash = cls.__dict__.get('__hash__', MISSING) 881 has_explicit_hash = not (class_hash is MISSING or 882 (class_hash is None and '__eq__' in cls.__dict__)) 883 884 # If we're generating ordering methods, we must be generating the 885 # eq methods. 886 if order and not eq: 887 raise ValueError('eq must be true if order is true') 888 889 if init: 890 # Does this class have a post-init function? 891 has_post_init = hasattr(cls, _POST_INIT_NAME) 892 893 # Include InitVars and regular fields (so, not ClassVars). 894 flds = [f for f in fields.values() 895 if f._field_type in (_FIELD, _FIELD_INITVAR)] 896 _set_new_attribute(cls, '__init__', 897 _init_fn(flds, 898 frozen, 899 has_post_init, 900 # The name to use for the "self" 901 # param in __init__. Use "self" 902 # if possible. 903 '__dataclass_self__' if 'self' in fields 904 else 'self', 905 )) 906 907 # Get the fields as a list, and include only real fields. This is 908 # used in all of the following methods. 909 field_list = [f for f in fields.values() if f._field_type is _FIELD] 910 911 if repr: 912 flds = [f for f in field_list if f.repr] 913 _set_new_attribute(cls, '__repr__', _repr_fn(flds)) 914 915 if eq: 916 # Create _eq__ method. There's no need for a __ne__ method, 917 # since python will call __eq__ and negate it. 918 flds = [f for f in field_list if f.compare] 919 self_tuple = _tuple_str('self', flds) 920 other_tuple = _tuple_str('other', flds) 921 _set_new_attribute(cls, '__eq__', 922 _cmp_fn('__eq__', '==', 923 self_tuple, other_tuple)) 924 925 if order: 926 # Create and set the ordering methods. 927 flds = [f for f in field_list if f.compare] 928 self_tuple = _tuple_str('self', flds) 929 other_tuple = _tuple_str('other', flds) 930 for name, op in [('__lt__', '<'), 931 ('__le__', '<='), 932 ('__gt__', '>'), 933 ('__ge__', '>='), 934 ]: 935 if _set_new_attribute(cls, name, 936 _cmp_fn(name, op, self_tuple, other_tuple)): 937 raise TypeError(f'Cannot overwrite attribute {name} ' 938 f'in class {cls.__name__}. Consider using ' 939 'functools.total_ordering') 940 941 if frozen: 942 for fn in _frozen_get_del_attr(cls, field_list): 943 if _set_new_attribute(cls, fn.__name__, fn): 944 raise TypeError(f'Cannot overwrite attribute {fn.__name__} ' 945 f'in class {cls.__name__}') 946 947 # Decide if/how we're going to create a hash function. 948 hash_action = _hash_action[bool(unsafe_hash), 949 bool(eq), 950 bool(frozen), 951 has_explicit_hash] 952 if hash_action: 953 # No need to call _set_new_attribute here, since by the time 954 # we're here the overwriting is unconditional. 955 cls.__hash__ = hash_action(cls, field_list) 956 957 if not getattr(cls, '__doc__'): 958 # Create a class doc-string. 959 cls.__doc__ = (cls.__name__ + 960 str(inspect.signature(cls)).replace(' -> None', '')) 961 962 return cls 963 964 965# _cls should never be specified by keyword, so start it with an 966# underscore. The presence of _cls is used to detect if this 967# decorator is being called with parameters or not. 968def dataclass(_cls=None, *, init=True, repr=True, eq=True, order=False, 969 unsafe_hash=False, frozen=False): 970 """Returns the same class as was passed in, with dunder methods 971 added based on the fields defined in the class. 972 973 Examines PEP 526 __annotations__ to determine fields. 974 975 If init is true, an __init__() method is added to the class. If 976 repr is true, a __repr__() method is added. If order is true, rich 977 comparison dunder methods are added. If unsafe_hash is true, a 978 __hash__() method function is added. If frozen is true, fields may 979 not be assigned to after instance creation. 980 """ 981 982 def wrap(cls): 983 return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen) 984 985 # See if we're being called as @dataclass or @dataclass(). 986 if _cls is None: 987 # We're called with parens. 988 return wrap 989 990 # We're called as @dataclass without parens. 991 return wrap(_cls) 992 993 994def fields(class_or_instance): 995 """Return a tuple describing the fields of this dataclass. 996 997 Accepts a dataclass or an instance of one. Tuple elements are of 998 type Field. 999 """ 1000 1001 # Might it be worth caching this, per class? 1002 try: 1003 fields = getattr(class_or_instance, _FIELDS) 1004 except AttributeError: 1005 raise TypeError('must be called with a dataclass type or instance') 1006 1007 # Exclude pseudo-fields. Note that fields is sorted by insertion 1008 # order, so the order of the tuple is as the fields were defined. 1009 return tuple(f for f in fields.values() if f._field_type is _FIELD) 1010 1011 1012def _is_dataclass_instance(obj): 1013 """Returns True if obj is an instance of a dataclass.""" 1014 return not isinstance(obj, type) and hasattr(obj, _FIELDS) 1015 1016 1017def is_dataclass(obj): 1018 """Returns True if obj is a dataclass or an instance of a 1019 dataclass.""" 1020 return hasattr(obj, _FIELDS) 1021 1022 1023def asdict(obj, *, dict_factory=dict): 1024 """Return the fields of a dataclass instance as a new dictionary mapping 1025 field names to field values. 1026 1027 Example usage: 1028 1029 @dataclass 1030 class C: 1031 x: int 1032 y: int 1033 1034 c = C(1, 2) 1035 assert asdict(c) == {'x': 1, 'y': 2} 1036 1037 If given, 'dict_factory' will be used instead of built-in dict. 1038 The function applies recursively to field values that are 1039 dataclass instances. This will also look into built-in containers: 1040 tuples, lists, and dicts. 1041 """ 1042 if not _is_dataclass_instance(obj): 1043 raise TypeError("asdict() should be called on dataclass instances") 1044 return _asdict_inner(obj, dict_factory) 1045 1046 1047def _asdict_inner(obj, dict_factory): 1048 if _is_dataclass_instance(obj): 1049 result = [] 1050 for f in fields(obj): 1051 value = _asdict_inner(getattr(obj, f.name), dict_factory) 1052 result.append((f.name, value)) 1053 return dict_factory(result) 1054 elif isinstance(obj, tuple) and hasattr(obj, '_fields'): 1055 # obj is a namedtuple. Recurse into it, but the returned 1056 # object is another namedtuple of the same type. This is 1057 # similar to how other list- or tuple-derived classes are 1058 # treated (see below), but we just need to create them 1059 # differently because a namedtuple's __init__ needs to be 1060 # called differently (see bpo-34363). 1061 1062 # I'm not using namedtuple's _asdict() 1063 # method, because: 1064 # - it does not recurse in to the namedtuple fields and 1065 # convert them to dicts (using dict_factory). 1066 # - I don't actually want to return a dict here. The the main 1067 # use case here is json.dumps, and it handles converting 1068 # namedtuples to lists. Admittedly we're losing some 1069 # information here when we produce a json list instead of a 1070 # dict. Note that if we returned dicts here instead of 1071 # namedtuples, we could no longer call asdict() on a data 1072 # structure where a namedtuple was used as a dict key. 1073 1074 return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) 1075 elif isinstance(obj, (list, tuple)): 1076 # Assume we can create an object of this type by passing in a 1077 # generator (which is not true for namedtuples, handled 1078 # above). 1079 return type(obj)(_asdict_inner(v, dict_factory) for v in obj) 1080 elif isinstance(obj, dict): 1081 return type(obj)((_asdict_inner(k, dict_factory), 1082 _asdict_inner(v, dict_factory)) 1083 for k, v in obj.items()) 1084 else: 1085 return copy.deepcopy(obj) 1086 1087 1088def astuple(obj, *, tuple_factory=tuple): 1089 """Return the fields of a dataclass instance as a new tuple of field values. 1090 1091 Example usage:: 1092 1093 @dataclass 1094 class C: 1095 x: int 1096 y: int 1097 1098 c = C(1, 2) 1099 assert astuple(c) == (1, 2) 1100 1101 If given, 'tuple_factory' will be used instead of built-in tuple. 1102 The function applies recursively to field values that are 1103 dataclass instances. This will also look into built-in containers: 1104 tuples, lists, and dicts. 1105 """ 1106 1107 if not _is_dataclass_instance(obj): 1108 raise TypeError("astuple() should be called on dataclass instances") 1109 return _astuple_inner(obj, tuple_factory) 1110 1111 1112def _astuple_inner(obj, tuple_factory): 1113 if _is_dataclass_instance(obj): 1114 result = [] 1115 for f in fields(obj): 1116 value = _astuple_inner(getattr(obj, f.name), tuple_factory) 1117 result.append(value) 1118 return tuple_factory(result) 1119 elif isinstance(obj, tuple) and hasattr(obj, '_fields'): 1120 # obj is a namedtuple. Recurse into it, but the returned 1121 # object is another namedtuple of the same type. This is 1122 # similar to how other list- or tuple-derived classes are 1123 # treated (see below), but we just need to create them 1124 # differently because a namedtuple's __init__ needs to be 1125 # called differently (see bpo-34363). 1126 return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj]) 1127 elif isinstance(obj, (list, tuple)): 1128 # Assume we can create an object of this type by passing in a 1129 # generator (which is not true for namedtuples, handled 1130 # above). 1131 return type(obj)(_astuple_inner(v, tuple_factory) for v in obj) 1132 elif isinstance(obj, dict): 1133 return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) 1134 for k, v in obj.items()) 1135 else: 1136 return copy.deepcopy(obj) 1137 1138 1139def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, 1140 repr=True, eq=True, order=False, unsafe_hash=False, 1141 frozen=False): 1142 """Return a new dynamically created dataclass. 1143 1144 The dataclass name will be 'cls_name'. 'fields' is an iterable 1145 of either (name), (name, type) or (name, type, Field) objects. If type is 1146 omitted, use the string 'typing.Any'. Field objects are created by 1147 the equivalent of calling 'field(name, type [, Field-info])'. 1148 1149 C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,)) 1150 1151 is equivalent to: 1152 1153 @dataclass 1154 class C(Base): 1155 x: 'typing.Any' 1156 y: int 1157 z: int = field(init=False) 1158 1159 For the bases and namespace parameters, see the builtin type() function. 1160 1161 The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to 1162 dataclass(). 1163 """ 1164 1165 if namespace is None: 1166 namespace = {} 1167 else: 1168 # Copy namespace since we're going to mutate it. 1169 namespace = namespace.copy() 1170 1171 # While we're looking through the field names, validate that they 1172 # are identifiers, are not keywords, and not duplicates. 1173 seen = set() 1174 anns = {} 1175 for item in fields: 1176 if isinstance(item, str): 1177 name = item 1178 tp = 'typing.Any' 1179 elif len(item) == 2: 1180 name, tp, = item 1181 elif len(item) == 3: 1182 name, tp, spec = item 1183 namespace[name] = spec 1184 else: 1185 raise TypeError(f'Invalid field: {item!r}') 1186 1187 if not isinstance(name, str) or not name.isidentifier(): 1188 raise TypeError(f'Field names must be valid identifers: {name!r}') 1189 if keyword.iskeyword(name): 1190 raise TypeError(f'Field names must not be keywords: {name!r}') 1191 if name in seen: 1192 raise TypeError(f'Field name duplicated: {name!r}') 1193 1194 seen.add(name) 1195 anns[name] = tp 1196 1197 namespace['__annotations__'] = anns 1198 # We use `types.new_class()` instead of simply `type()` to allow dynamic creation 1199 # of generic dataclassses. 1200 cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace)) 1201 return dataclass(cls, init=init, repr=repr, eq=eq, order=order, 1202 unsafe_hash=unsafe_hash, frozen=frozen) 1203 1204 1205def replace(obj, **changes): 1206 """Return a new object replacing specified fields with new values. 1207 1208 This is especially useful for frozen classes. Example usage: 1209 1210 @dataclass(frozen=True) 1211 class C: 1212 x: int 1213 y: int 1214 1215 c = C(1, 2) 1216 c1 = replace(c, x=3) 1217 assert c1.x == 3 and c1.y == 2 1218 """ 1219 1220 # We're going to mutate 'changes', but that's okay because it's a 1221 # new dict, even if called with 'replace(obj, **my_changes)'. 1222 1223 if not _is_dataclass_instance(obj): 1224 raise TypeError("replace() should be called on dataclass instances") 1225 1226 # It's an error to have init=False fields in 'changes'. 1227 # If a field is not in 'changes', read its value from the provided obj. 1228 1229 for f in getattr(obj, _FIELDS).values(): 1230 # Only consider normal fields or InitVars. 1231 if f._field_type is _FIELD_CLASSVAR: 1232 continue 1233 1234 if not f.init: 1235 # Error if this field is specified in changes. 1236 if f.name in changes: 1237 raise ValueError(f'field {f.name} is declared with ' 1238 'init=False, it cannot be specified with ' 1239 'replace()') 1240 continue 1241 1242 if f.name not in changes: 1243 if f._field_type is _FIELD_INITVAR: 1244 raise ValueError(f"InitVar {f.name!r} " 1245 'must be specified with replace()') 1246 changes[f.name] = getattr(obj, f.name) 1247 1248 # Create the new object, which calls __init__() and 1249 # __post_init__() (if defined), using all of the init fields we've 1250 # added and/or left in 'changes'. If there are values supplied in 1251 # changes that aren't fields, this will correctly raise a 1252 # TypeError. 1253 return obj.__class__(**changes) 1254