• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import re
2import sys
3import copy
4import types
5import inspect
6import keyword
7import builtins
8import functools
9import _thread
10
11
12__all__ = ['dataclass',
13           'field',
14           'Field',
15           'FrozenInstanceError',
16           'InitVar',
17           'MISSING',
18
19           # Helper functions.
20           'fields',
21           'asdict',
22           'astuple',
23           'make_dataclass',
24           'replace',
25           'is_dataclass',
26           ]
27
28# Conditions for adding methods.  The boxes indicate what action the
29# dataclass decorator takes.  For all of these tables, when I talk
30# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
31# referring to the arguments to the @dataclass decorator.  When
32# checking if a dunder method already exists, I mean check for an
33# entry in the class's __dict__.  I never check to see if an attribute
34# is defined in a base class.
35
36# Key:
37# +=========+=========================================+
38# + Value   | Meaning                                 |
39# +=========+=========================================+
40# | <blank> | No action: no method is added.          |
41# +---------+-----------------------------------------+
42# | add     | Generated method is added.              |
43# +---------+-----------------------------------------+
44# | raise   | TypeError is raised.                    |
45# +---------+-----------------------------------------+
46# | None    | Attribute is set to None.               |
47# +=========+=========================================+
48
49# __init__
50#
51#   +--- init= parameter
52#   |
53#   v     |       |       |
54#         |  no   |  yes  |  <--- class has __init__ in __dict__?
55# +=======+=======+=======+
56# | False |       |       |
57# +-------+-------+-------+
58# | True  | add   |       |  <- the default
59# +=======+=======+=======+
60
61# __repr__
62#
63#    +--- repr= parameter
64#    |
65#    v    |       |       |
66#         |  no   |  yes  |  <--- class has __repr__ in __dict__?
67# +=======+=======+=======+
68# | False |       |       |
69# +-------+-------+-------+
70# | True  | add   |       |  <- the default
71# +=======+=======+=======+
72
73
74# __setattr__
75# __delattr__
76#
77#    +--- frozen= parameter
78#    |
79#    v    |       |       |
80#         |  no   |  yes  |  <--- class has __setattr__ or __delattr__ in __dict__?
81# +=======+=======+=======+
82# | False |       |       |  <- the default
83# +-------+-------+-------+
84# | True  | add   | raise |
85# +=======+=======+=======+
86# Raise because not adding these methods would break the "frozen-ness"
87# of the class.
88
89# __eq__
90#
91#    +--- eq= parameter
92#    |
93#    v    |       |       |
94#         |  no   |  yes  |  <--- class has __eq__ in __dict__?
95# +=======+=======+=======+
96# | False |       |       |
97# +-------+-------+-------+
98# | True  | add   |       |  <- the default
99# +=======+=======+=======+
100
101# __lt__
102# __le__
103# __gt__
104# __ge__
105#
106#    +--- order= parameter
107#    |
108#    v    |       |       |
109#         |  no   |  yes  |  <--- class has any comparison method in __dict__?
110# +=======+=======+=======+
111# | False |       |       |  <- the default
112# +-------+-------+-------+
113# | True  | add   | raise |
114# +=======+=======+=======+
115# Raise because to allow this case would interfere with using
116# functools.total_ordering.
117
118# __hash__
119
120#    +------------------- unsafe_hash= parameter
121#    |       +----------- eq= parameter
122#    |       |       +--- frozen= parameter
123#    |       |       |
124#    v       v       v    |        |        |
125#                         |   no   |  yes   |  <--- class has explicitly defined __hash__
126# +=======+=======+=======+========+========+
127# | False | False | False |        |        | No __eq__, use the base class __hash__
128# +-------+-------+-------+--------+--------+
129# | False | False | True  |        |        | No __eq__, use the base class __hash__
130# +-------+-------+-------+--------+--------+
131# | False | True  | False | None   |        | <-- the default, not hashable
132# +-------+-------+-------+--------+--------+
133# | False | True  | True  | add    |        | Frozen, so hashable, allows override
134# +-------+-------+-------+--------+--------+
135# | True  | False | False | add    | raise  | Has no __eq__, but hashable
136# +-------+-------+-------+--------+--------+
137# | True  | False | True  | add    | raise  | Has no __eq__, but hashable
138# +-------+-------+-------+--------+--------+
139# | True  | True  | False | add    | raise  | Not frozen, but hashable
140# +-------+-------+-------+--------+--------+
141# | True  | True  | True  | add    | raise  | Frozen, so hashable
142# +=======+=======+=======+========+========+
143# For boxes that are blank, __hash__ is untouched and therefore
144# inherited from the base class.  If the base is object, then
145# id-based hashing is used.
146#
147# Note that a class may already have __hash__=None if it specified an
148# __eq__ method in the class body (not one that was created by
149# @dataclass).
150#
151# See _hash_action (below) for a coded version of this table.
152
153
154# Raised when an attempt is made to modify a frozen class.
155class FrozenInstanceError(AttributeError): pass
156
157# A sentinel object for default values to signal that a default
158# factory will be used.  This is given a nice repr() which will appear
159# in the function signature of dataclasses' constructors.
160class _HAS_DEFAULT_FACTORY_CLASS:
161    def __repr__(self):
162        return '<factory>'
163_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
164
165# A sentinel object to detect if a parameter is supplied or not.  Use
166# a class to give it a better repr.
167class _MISSING_TYPE:
168    pass
169MISSING = _MISSING_TYPE()
170
171# Since most per-field metadata will be unused, create an empty
172# read-only proxy that can be shared among all fields.
173_EMPTY_METADATA = types.MappingProxyType({})
174
175# Markers for the various kinds of fields and pseudo-fields.
176class _FIELD_BASE:
177    def __init__(self, name):
178        self.name = name
179    def __repr__(self):
180        return self.name
181_FIELD = _FIELD_BASE('_FIELD')
182_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
183_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
184
185# The name of an attribute on the class where we store the Field
186# objects.  Also used to check if a class is a Data Class.
187_FIELDS = '__dataclass_fields__'
188
189# The name of an attribute on the class that stores the parameters to
190# @dataclass.
191_PARAMS = '__dataclass_params__'
192
193# The name of the function, that if it exists, is called at the end of
194# __init__.
195_POST_INIT_NAME = '__post_init__'
196
197# String regex that string annotations for ClassVar or InitVar must match.
198# Allows "identifier.identifier[" or "identifier[".
199# https://bugs.python.org/issue33453 for details.
200_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
201
202class _InitVarMeta(type):
203    def __getitem__(self, params):
204        return self
205
206class InitVar(metaclass=_InitVarMeta):
207    pass
208
209
210# Instances of Field are only ever created from within this module,
211# and only from the field() function, although Field instances are
212# exposed externally as (conceptually) read-only objects.
213#
214# name and type are filled in after the fact, not in __init__.
215# They're not known at the time this class is instantiated, but it's
216# convenient if they're available later.
217#
218# When cls._FIELDS is filled in with a list of Field objects, the name
219# and type fields will have been populated.
220class Field:
221    __slots__ = ('name',
222                 'type',
223                 'default',
224                 'default_factory',
225                 'repr',
226                 'hash',
227                 'init',
228                 'compare',
229                 'metadata',
230                 '_field_type',  # Private: not to be used by user code.
231                 )
232
233    def __init__(self, default, default_factory, init, repr, hash, compare,
234                 metadata):
235        self.name = None
236        self.type = None
237        self.default = default
238        self.default_factory = default_factory
239        self.init = init
240        self.repr = repr
241        self.hash = hash
242        self.compare = compare
243        self.metadata = (_EMPTY_METADATA
244                         if metadata is None else
245                         types.MappingProxyType(metadata))
246        self._field_type = None
247
248    def __repr__(self):
249        return ('Field('
250                f'name={self.name!r},'
251                f'type={self.type!r},'
252                f'default={self.default!r},'
253                f'default_factory={self.default_factory!r},'
254                f'init={self.init!r},'
255                f'repr={self.repr!r},'
256                f'hash={self.hash!r},'
257                f'compare={self.compare!r},'
258                f'metadata={self.metadata!r},'
259                f'_field_type={self._field_type}'
260                ')')
261
262    # This is used to support the PEP 487 __set_name__ protocol in the
263    # case where we're using a field that contains a descriptor as a
264    # default value.  For details on __set_name__, see
265    # https://www.python.org/dev/peps/pep-0487/#implementation-details.
266    #
267    # Note that in _process_class, this Field object is overwritten
268    # with the default value, so the end result is a descriptor that
269    # had __set_name__ called on it at the right time.
270    def __set_name__(self, owner, name):
271        func = getattr(type(self.default), '__set_name__', None)
272        if func:
273            # There is a __set_name__ method on the descriptor, call
274            # it.
275            func(self.default, owner, name)
276
277
278class _DataclassParams:
279    __slots__ = ('init',
280                 'repr',
281                 'eq',
282                 'order',
283                 'unsafe_hash',
284                 'frozen',
285                 )
286
287    def __init__(self, init, repr, eq, order, unsafe_hash, frozen):
288        self.init = init
289        self.repr = repr
290        self.eq = eq
291        self.order = order
292        self.unsafe_hash = unsafe_hash
293        self.frozen = frozen
294
295    def __repr__(self):
296        return ('_DataclassParams('
297                f'init={self.init!r},'
298                f'repr={self.repr!r},'
299                f'eq={self.eq!r},'
300                f'order={self.order!r},'
301                f'unsafe_hash={self.unsafe_hash!r},'
302                f'frozen={self.frozen!r}'
303                ')')
304
305
306# This function is used instead of exposing Field creation directly,
307# so that a type checker can be told (via overloads) that this is a
308# function whose type depends on its parameters.
309def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
310          hash=None, compare=True, metadata=None):
311    """Return an object to identify dataclass fields.
312
313    default is the default value of the field.  default_factory is a
314    0-argument function called to initialize a field's value.  If init
315    is True, the field will be a parameter to the class's __init__()
316    function.  If repr is True, the field will be included in the
317    object's repr().  If hash is True, the field will be included in
318    the object's hash().  If compare is True, the field will be used
319    in comparison functions.  metadata, if specified, must be a
320    mapping which is stored but not otherwise examined by dataclass.
321
322    It is an error to specify both default and default_factory.
323    """
324
325    if default is not MISSING and default_factory is not MISSING:
326        raise ValueError('cannot specify both default and default_factory')
327    return Field(default, default_factory, init, repr, hash, compare,
328                 metadata)
329
330
331def _tuple_str(obj_name, fields):
332    # Return a string representing each field of obj_name as a tuple
333    # member.  So, if fields is ['x', 'y'] and obj_name is "self",
334    # return "(self.x,self.y)".
335
336    # Special case for the 0-tuple.
337    if not fields:
338        return '()'
339    # Note the trailing comma, needed if this turns out to be a 1-tuple.
340    return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
341
342
343# This function's logic is copied from "recursive_repr" function in
344# reprlib module to avoid dependency.
345def _recursive_repr(user_function):
346    # Decorator to make a repr function return "..." for a recursive
347    # call.
348    repr_running = set()
349
350    @functools.wraps(user_function)
351    def wrapper(self):
352        key = id(self), _thread.get_ident()
353        if key in repr_running:
354            return '...'
355        repr_running.add(key)
356        try:
357            result = user_function(self)
358        finally:
359            repr_running.discard(key)
360        return result
361    return wrapper
362
363
364def _create_fn(name, args, body, *, globals=None, locals=None,
365               return_type=MISSING):
366    # Note that we mutate locals when exec() is called.  Caller
367    # beware!  The only callers are internal to this module, so no
368    # worries about external callers.
369    if locals is None:
370        locals = {}
371    # __builtins__ may be the "builtins" module or
372    # the value of its "__dict__",
373    # so make sure "__builtins__" is the module.
374    if globals is not None and '__builtins__' not in globals:
375        globals['__builtins__'] = builtins
376    return_annotation = ''
377    if return_type is not MISSING:
378        locals['_return_type'] = return_type
379        return_annotation = '->_return_type'
380    args = ','.join(args)
381    body = '\n'.join(f' {b}' for b in body)
382
383    # Compute the text of the entire function.
384    txt = f'def {name}({args}){return_annotation}:\n{body}'
385
386    exec(txt, globals, locals)
387    return locals[name]
388
389
390def _field_assign(frozen, name, value, self_name):
391    # If we're a frozen class, then assign to our fields in __init__
392    # via object.__setattr__.  Otherwise, just use a simple
393    # assignment.
394    #
395    # self_name is what "self" is called in this function: don't
396    # hard-code "self", since that might be a field name.
397    if frozen:
398        return f'__builtins__.object.__setattr__({self_name},{name!r},{value})'
399    return f'{self_name}.{name}={value}'
400
401
402def _field_init(f, frozen, globals, self_name):
403    # Return the text of the line in the body of __init__ that will
404    # initialize this field.
405
406    default_name = f'_dflt_{f.name}'
407    if f.default_factory is not MISSING:
408        if f.init:
409            # This field has a default factory.  If a parameter is
410            # given, use it.  If not, call the factory.
411            globals[default_name] = f.default_factory
412            value = (f'{default_name}() '
413                     f'if {f.name} is _HAS_DEFAULT_FACTORY '
414                     f'else {f.name}')
415        else:
416            # This is a field that's not in the __init__ params, but
417            # has a default factory function.  It needs to be
418            # initialized here by calling the factory function,
419            # because there's no other way to initialize it.
420
421            # For a field initialized with a default=defaultvalue, the
422            # class dict just has the default value
423            # (cls.fieldname=defaultvalue).  But that won't work for a
424            # default factory, the factory must be called in __init__
425            # and we must assign that to self.fieldname.  We can't
426            # fall back to the class dict's value, both because it's
427            # not set, and because it might be different per-class
428            # (which, after all, is why we have a factory function!).
429
430            globals[default_name] = f.default_factory
431            value = f'{default_name}()'
432    else:
433        # No default factory.
434        if f.init:
435            if f.default is MISSING:
436                # There's no default, just do an assignment.
437                value = f.name
438            elif f.default is not MISSING:
439                globals[default_name] = f.default
440                value = f.name
441        else:
442            # This field does not need initialization.  Signify that
443            # to the caller by returning None.
444            return None
445
446    # Only test this now, so that we can create variables for the
447    # default.  However, return None to signify that we're not going
448    # to actually do the assignment statement for InitVars.
449    if f._field_type is _FIELD_INITVAR:
450        return None
451
452    # Now, actually generate the field assignment.
453    return _field_assign(frozen, f.name, value, self_name)
454
455
456def _init_param(f):
457    # Return the __init__ parameter string for this field.  For
458    # example, the equivalent of 'x:int=3' (except instead of 'int',
459    # reference a variable set to int, and instead of '3', reference a
460    # variable set to 3).
461    if f.default is MISSING and f.default_factory is MISSING:
462        # There's no default, and no default_factory, just output the
463        # variable name and type.
464        default = ''
465    elif f.default is not MISSING:
466        # There's a default, this will be the name that's used to look
467        # it up.
468        default = f'=_dflt_{f.name}'
469    elif f.default_factory is not MISSING:
470        # There's a factory function.  Set a marker.
471        default = '=_HAS_DEFAULT_FACTORY'
472    return f'{f.name}:_type_{f.name}{default}'
473
474
475def _init_fn(fields, frozen, has_post_init, self_name):
476    # fields contains both real fields and InitVar pseudo-fields.
477
478    # Make sure we don't have fields without defaults following fields
479    # with defaults.  This actually would be caught when exec-ing the
480    # function source code, but catching it here gives a better error
481    # message, and future-proofs us in case we build up the function
482    # using ast.
483    seen_default = False
484    for f in fields:
485        # Only consider fields in the __init__ call.
486        if f.init:
487            if not (f.default is MISSING and f.default_factory is MISSING):
488                seen_default = True
489            elif seen_default:
490                raise TypeError(f'non-default argument {f.name!r} '
491                                'follows default argument')
492
493    globals = {'MISSING': MISSING,
494               '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY}
495
496    body_lines = []
497    for f in fields:
498        line = _field_init(f, frozen, globals, self_name)
499        # line is None means that this field doesn't require
500        # initialization (it's a pseudo-field).  Just skip it.
501        if line:
502            body_lines.append(line)
503
504    # Does this class have a post-init function?
505    if has_post_init:
506        params_str = ','.join(f.name for f in fields
507                              if f._field_type is _FIELD_INITVAR)
508        body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
509
510    # If no body lines, use 'pass'.
511    if not body_lines:
512        body_lines = ['pass']
513
514    locals = {f'_type_{f.name}': f.type for f in fields}
515    return _create_fn('__init__',
516                      [self_name] + [_init_param(f) for f in fields if f.init],
517                      body_lines,
518                      locals=locals,
519                      globals=globals,
520                      return_type=None)
521
522
523def _repr_fn(fields):
524    fn = _create_fn('__repr__',
525                    ('self',),
526                    ['return self.__class__.__qualname__ + f"(' +
527                     ', '.join([f"{f.name}={{self.{f.name}!r}}"
528                                for f in fields]) +
529                     ')"'])
530    return _recursive_repr(fn)
531
532
533def _frozen_get_del_attr(cls, fields):
534    # XXX: globals is modified on the first call to _create_fn, then
535    # the modified version is used in the second call.  Is this okay?
536    globals = {'cls': cls,
537              'FrozenInstanceError': FrozenInstanceError}
538    if fields:
539        fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
540    else:
541        # Special case for the zero-length tuple.
542        fields_str = '()'
543    return (_create_fn('__setattr__',
544                      ('self', 'name', 'value'),
545                      (f'if type(self) is cls or name in {fields_str}:',
546                        ' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
547                       f'super(cls, self).__setattr__(name, value)'),
548                       globals=globals),
549            _create_fn('__delattr__',
550                      ('self', 'name'),
551                      (f'if type(self) is cls or name in {fields_str}:',
552                        ' raise FrozenInstanceError(f"cannot delete field {name!r}")',
553                       f'super(cls, self).__delattr__(name)'),
554                       globals=globals),
555            )
556
557
558def _cmp_fn(name, op, self_tuple, other_tuple):
559    # Create a comparison function.  If the fields in the object are
560    # named 'x' and 'y', then self_tuple is the string
561    # '(self.x,self.y)' and other_tuple is the string
562    # '(other.x,other.y)'.
563
564    return _create_fn(name,
565                      ('self', 'other'),
566                      [ 'if other.__class__ is self.__class__:',
567                       f' return {self_tuple}{op}{other_tuple}',
568                        'return NotImplemented'])
569
570
571def _hash_fn(fields):
572    self_tuple = _tuple_str('self', fields)
573    return _create_fn('__hash__',
574                      ('self',),
575                      [f'return hash({self_tuple})'])
576
577
578def _is_classvar(a_type, typing):
579    # This test uses a typing internal class, but it's the best way to
580    # test if this is a ClassVar.
581    return (a_type is typing.ClassVar
582            or (type(a_type) is typing._GenericAlias
583                and a_type.__origin__ is typing.ClassVar))
584
585
586def _is_initvar(a_type, dataclasses):
587    # The module we're checking against is the module we're
588    # currently in (dataclasses.py).
589    return a_type is dataclasses.InitVar
590
591
592def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
593    # Given a type annotation string, does it refer to a_type in
594    # a_module?  For example, when checking that annotation denotes a
595    # ClassVar, then a_module is typing, and a_type is
596    # typing.ClassVar.
597
598    # It's possible to look up a_module given a_type, but it involves
599    # looking in sys.modules (again!), and seems like a waste since
600    # the caller already knows a_module.
601
602    # - annotation is a string type annotation
603    # - cls is the class that this annotation was found in
604    # - a_module is the module we want to match
605    # - a_type is the type in that module we want to match
606    # - is_type_predicate is a function called with (obj, a_module)
607    #   that determines if obj is of the desired type.
608
609    # Since this test does not do a local namespace lookup (and
610    # instead only a module (global) lookup), there are some things it
611    # gets wrong.
612
613    # With string annotations, cv0 will be detected as a ClassVar:
614    #   CV = ClassVar
615    #   @dataclass
616    #   class C0:
617    #     cv0: CV
618
619    # But in this example cv1 will not be detected as a ClassVar:
620    #   @dataclass
621    #   class C1:
622    #     CV = ClassVar
623    #     cv1: CV
624
625    # In C1, the code in this function (_is_type) will look up "CV" in
626    # the module and not find it, so it will not consider cv1 as a
627    # ClassVar.  This is a fairly obscure corner case, and the best
628    # way to fix it would be to eval() the string "CV" with the
629    # correct global and local namespaces.  However that would involve
630    # a eval() penalty for every single field of every dataclass
631    # that's defined.  It was judged not worth it.
632
633    match = _MODULE_IDENTIFIER_RE.match(annotation)
634    if match:
635        ns = None
636        module_name = match.group(1)
637        if not module_name:
638            # No module name, assume the class's module did
639            # "from dataclasses import InitVar".
640            ns = sys.modules.get(cls.__module__).__dict__
641        else:
642            # Look up module_name in the class's module.
643            module = sys.modules.get(cls.__module__)
644            if module and module.__dict__.get(module_name) is a_module:
645                ns = sys.modules.get(a_type.__module__).__dict__
646        if ns and is_type_predicate(ns.get(match.group(2)), a_module):
647            return True
648    return False
649
650
651def _get_field(cls, a_name, a_type):
652    # Return a Field object for this field name and type.  ClassVars
653    # and InitVars are also returned, but marked as such (see
654    # f._field_type).
655
656    # If the default value isn't derived from Field, then it's only a
657    # normal default value.  Convert it to a Field().
658    default = getattr(cls, a_name, MISSING)
659    if isinstance(default, Field):
660        f = default
661    else:
662        if isinstance(default, types.MemberDescriptorType):
663            # This is a field in __slots__, so it has no default value.
664            default = MISSING
665        f = field(default=default)
666
667    # Only at this point do we know the name and the type.  Set them.
668    f.name = a_name
669    f.type = a_type
670
671    # Assume it's a normal field until proven otherwise.  We're next
672    # going to decide if it's a ClassVar or InitVar, everything else
673    # is just a normal field.
674    f._field_type = _FIELD
675
676    # In addition to checking for actual types here, also check for
677    # string annotations.  get_type_hints() won't always work for us
678    # (see https://github.com/python/typing/issues/508 for example),
679    # plus it's expensive and would require an eval for every stirng
680    # annotation.  So, make a best effort to see if this is a ClassVar
681    # or InitVar using regex's and checking that the thing referenced
682    # is actually of the correct type.
683
684    # For the complete discussion, see https://bugs.python.org/issue33453
685
686    # If typing has not been imported, then it's impossible for any
687    # annotation to be a ClassVar.  So, only look for ClassVar if
688    # typing has been imported by any module (not necessarily cls's
689    # module).
690    typing = sys.modules.get('typing')
691    if typing:
692        if (_is_classvar(a_type, typing)
693            or (isinstance(f.type, str)
694                and _is_type(f.type, cls, typing, typing.ClassVar,
695                             _is_classvar))):
696            f._field_type = _FIELD_CLASSVAR
697
698    # If the type is InitVar, or if it's a matching string annotation,
699    # then it's an InitVar.
700    if f._field_type is _FIELD:
701        # The module we're checking against is the module we're
702        # currently in (dataclasses.py).
703        dataclasses = sys.modules[__name__]
704        if (_is_initvar(a_type, dataclasses)
705            or (isinstance(f.type, str)
706                and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
707                             _is_initvar))):
708            f._field_type = _FIELD_INITVAR
709
710    # Validations for individual fields.  This is delayed until now,
711    # instead of in the Field() constructor, since only here do we
712    # know the field name, which allows for better error reporting.
713
714    # Special restrictions for ClassVar and InitVar.
715    if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
716        if f.default_factory is not MISSING:
717            raise TypeError(f'field {f.name} cannot have a '
718                            'default factory')
719        # Should I check for other field settings? default_factory
720        # seems the most serious to check for.  Maybe add others.  For
721        # example, how about init=False (or really,
722        # init=<not-the-default-init-value>)?  It makes no sense for
723        # ClassVar and InitVar to specify init=<anything>.
724
725    # For real fields, disallow mutable defaults for known types.
726    if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)):
727        raise ValueError(f'mutable default {type(f.default)} for field '
728                         f'{f.name} is not allowed: use default_factory')
729
730    return f
731
732
733def _set_new_attribute(cls, name, value):
734    # Never overwrites an existing attribute.  Returns True if the
735    # attribute already exists.
736    if name in cls.__dict__:
737        return True
738    setattr(cls, name, value)
739    return False
740
741
742# Decide if/how we're going to create a hash function.  Key is
743# (unsafe_hash, eq, frozen, does-hash-exist).  Value is the action to
744# take.  The common case is to do nothing, so instead of providing a
745# function that is a no-op, use None to signify that.
746
747def _hash_set_none(cls, fields):
748    return None
749
750def _hash_add(cls, fields):
751    flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
752    return _hash_fn(flds)
753
754def _hash_exception(cls, fields):
755    # Raise an exception.
756    raise TypeError(f'Cannot overwrite attribute __hash__ '
757                    f'in class {cls.__name__}')
758
759#
760#                +-------------------------------------- unsafe_hash?
761#                |      +------------------------------- eq?
762#                |      |      +------------------------ frozen?
763#                |      |      |      +----------------  has-explicit-hash?
764#                |      |      |      |
765#                |      |      |      |        +-------  action
766#                |      |      |      |        |
767#                v      v      v      v        v
768_hash_action = {(False, False, False, False): None,
769                (False, False, False, True ): None,
770                (False, False, True,  False): None,
771                (False, False, True,  True ): None,
772                (False, True,  False, False): _hash_set_none,
773                (False, True,  False, True ): None,
774                (False, True,  True,  False): _hash_add,
775                (False, True,  True,  True ): None,
776                (True,  False, False, False): _hash_add,
777                (True,  False, False, True ): _hash_exception,
778                (True,  False, True,  False): _hash_add,
779                (True,  False, True,  True ): _hash_exception,
780                (True,  True,  False, False): _hash_add,
781                (True,  True,  False, True ): _hash_exception,
782                (True,  True,  True,  False): _hash_add,
783                (True,  True,  True,  True ): _hash_exception,
784                }
785# See https://bugs.python.org/issue32929#msg312829 for an if-statement
786# version of this table.
787
788
789def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen):
790    # Now that dicts retain insertion order, there's no reason to use
791    # an ordered dict.  I am leveraging that ordering here, because
792    # derived class fields overwrite base class fields, but the order
793    # is defined by the base class, which is found first.
794    fields = {}
795
796    setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
797                                           unsafe_hash, frozen))
798
799    # Find our base classes in reverse MRO order, and exclude
800    # ourselves.  In reversed order so that more derived classes
801    # override earlier field definitions in base classes.  As long as
802    # we're iterating over them, see if any are frozen.
803    any_frozen_base = False
804    has_dataclass_bases = False
805    for b in cls.__mro__[-1:0:-1]:
806        # Only process classes that have been processed by our
807        # decorator.  That is, they have a _FIELDS attribute.
808        base_fields = getattr(b, _FIELDS, None)
809        if base_fields:
810            has_dataclass_bases = True
811            for f in base_fields.values():
812                fields[f.name] = f
813            if getattr(b, _PARAMS).frozen:
814                any_frozen_base = True
815
816    # Annotations that are defined in this class (not in base
817    # classes).  If __annotations__ isn't present, then this class
818    # adds no new annotations.  We use this to compute fields that are
819    # added by this class.
820    #
821    # Fields are found from cls_annotations, which is guaranteed to be
822    # ordered.  Default values are from class attributes, if a field
823    # has a default.  If the default value is a Field(), then it
824    # contains additional info beyond (and possibly including) the
825    # actual default value.  Pseudo-fields ClassVars and InitVars are
826    # included, despite the fact that they're not real fields.  That's
827    # dealt with later.
828    cls_annotations = cls.__dict__.get('__annotations__', {})
829
830    # Now find fields in our class.  While doing so, validate some
831    # things, and set the default values (as class attributes) where
832    # we can.
833    cls_fields = [_get_field(cls, name, type)
834                  for name, type in cls_annotations.items()]
835    for f in cls_fields:
836        fields[f.name] = f
837
838        # If the class attribute (which is the default value for this
839        # field) exists and is of type 'Field', replace it with the
840        # real default.  This is so that normal class introspection
841        # sees a real default value, not a Field.
842        if isinstance(getattr(cls, f.name, None), Field):
843            if f.default is MISSING:
844                # If there's no default, delete the class attribute.
845                # This happens if we specify field(repr=False), for
846                # example (that is, we specified a field object, but
847                # no default value).  Also if we're using a default
848                # factory.  The class attribute should not be set at
849                # all in the post-processed class.
850                delattr(cls, f.name)
851            else:
852                setattr(cls, f.name, f.default)
853
854    # Do we have any Field members that don't also have annotations?
855    for name, value in cls.__dict__.items():
856        if isinstance(value, Field) and not name in cls_annotations:
857            raise TypeError(f'{name!r} is a field but has no type annotation')
858
859    # Check rules that apply if we are derived from any dataclasses.
860    if has_dataclass_bases:
861        # Raise an exception if any of our bases are frozen, but we're not.
862        if any_frozen_base and not frozen:
863            raise TypeError('cannot inherit non-frozen dataclass from a '
864                            'frozen one')
865
866        # Raise an exception if we're frozen, but none of our bases are.
867        if not any_frozen_base and frozen:
868            raise TypeError('cannot inherit frozen dataclass from a '
869                            'non-frozen one')
870
871    # Remember all of the fields on our class (including bases).  This
872    # also marks this class as being a dataclass.
873    setattr(cls, _FIELDS, fields)
874
875    # Was this class defined with an explicit __hash__?  Note that if
876    # __eq__ is defined in this class, then python will automatically
877    # set __hash__ to None.  This is a heuristic, as it's possible
878    # that such a __hash__ == None was not auto-generated, but it
879    # close enough.
880    class_hash = cls.__dict__.get('__hash__', MISSING)
881    has_explicit_hash = not (class_hash is MISSING or
882                             (class_hash is None and '__eq__' in cls.__dict__))
883
884    # If we're generating ordering methods, we must be generating the
885    # eq methods.
886    if order and not eq:
887        raise ValueError('eq must be true if order is true')
888
889    if init:
890        # Does this class have a post-init function?
891        has_post_init = hasattr(cls, _POST_INIT_NAME)
892
893        # Include InitVars and regular fields (so, not ClassVars).
894        flds = [f for f in fields.values()
895                if f._field_type in (_FIELD, _FIELD_INITVAR)]
896        _set_new_attribute(cls, '__init__',
897                           _init_fn(flds,
898                                    frozen,
899                                    has_post_init,
900                                    # The name to use for the "self"
901                                    # param in __init__.  Use "self"
902                                    # if possible.
903                                    '__dataclass_self__' if 'self' in fields
904                                            else 'self',
905                          ))
906
907    # Get the fields as a list, and include only real fields.  This is
908    # used in all of the following methods.
909    field_list = [f for f in fields.values() if f._field_type is _FIELD]
910
911    if repr:
912        flds = [f for f in field_list if f.repr]
913        _set_new_attribute(cls, '__repr__', _repr_fn(flds))
914
915    if eq:
916        # Create _eq__ method.  There's no need for a __ne__ method,
917        # since python will call __eq__ and negate it.
918        flds = [f for f in field_list if f.compare]
919        self_tuple = _tuple_str('self', flds)
920        other_tuple = _tuple_str('other', flds)
921        _set_new_attribute(cls, '__eq__',
922                           _cmp_fn('__eq__', '==',
923                                   self_tuple, other_tuple))
924
925    if order:
926        # Create and set the ordering methods.
927        flds = [f for f in field_list if f.compare]
928        self_tuple = _tuple_str('self', flds)
929        other_tuple = _tuple_str('other', flds)
930        for name, op in [('__lt__', '<'),
931                         ('__le__', '<='),
932                         ('__gt__', '>'),
933                         ('__ge__', '>='),
934                         ]:
935            if _set_new_attribute(cls, name,
936                                  _cmp_fn(name, op, self_tuple, other_tuple)):
937                raise TypeError(f'Cannot overwrite attribute {name} '
938                                f'in class {cls.__name__}. Consider using '
939                                'functools.total_ordering')
940
941    if frozen:
942        for fn in _frozen_get_del_attr(cls, field_list):
943            if _set_new_attribute(cls, fn.__name__, fn):
944                raise TypeError(f'Cannot overwrite attribute {fn.__name__} '
945                                f'in class {cls.__name__}')
946
947    # Decide if/how we're going to create a hash function.
948    hash_action = _hash_action[bool(unsafe_hash),
949                               bool(eq),
950                               bool(frozen),
951                               has_explicit_hash]
952    if hash_action:
953        # No need to call _set_new_attribute here, since by the time
954        # we're here the overwriting is unconditional.
955        cls.__hash__ = hash_action(cls, field_list)
956
957    if not getattr(cls, '__doc__'):
958        # Create a class doc-string.
959        cls.__doc__ = (cls.__name__ +
960                       str(inspect.signature(cls)).replace(' -> None', ''))
961
962    return cls
963
964
965# _cls should never be specified by keyword, so start it with an
966# underscore.  The presence of _cls is used to detect if this
967# decorator is being called with parameters or not.
968def dataclass(_cls=None, *, init=True, repr=True, eq=True, order=False,
969              unsafe_hash=False, frozen=False):
970    """Returns the same class as was passed in, with dunder methods
971    added based on the fields defined in the class.
972
973    Examines PEP 526 __annotations__ to determine fields.
974
975    If init is true, an __init__() method is added to the class. If
976    repr is true, a __repr__() method is added. If order is true, rich
977    comparison dunder methods are added. If unsafe_hash is true, a
978    __hash__() method function is added. If frozen is true, fields may
979    not be assigned to after instance creation.
980    """
981
982    def wrap(cls):
983        return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen)
984
985    # See if we're being called as @dataclass or @dataclass().
986    if _cls is None:
987        # We're called with parens.
988        return wrap
989
990    # We're called as @dataclass without parens.
991    return wrap(_cls)
992
993
994def fields(class_or_instance):
995    """Return a tuple describing the fields of this dataclass.
996
997    Accepts a dataclass or an instance of one. Tuple elements are of
998    type Field.
999    """
1000
1001    # Might it be worth caching this, per class?
1002    try:
1003        fields = getattr(class_or_instance, _FIELDS)
1004    except AttributeError:
1005        raise TypeError('must be called with a dataclass type or instance')
1006
1007    # Exclude pseudo-fields.  Note that fields is sorted by insertion
1008    # order, so the order of the tuple is as the fields were defined.
1009    return tuple(f for f in fields.values() if f._field_type is _FIELD)
1010
1011
1012def _is_dataclass_instance(obj):
1013    """Returns True if obj is an instance of a dataclass."""
1014    return not isinstance(obj, type) and hasattr(obj, _FIELDS)
1015
1016
1017def is_dataclass(obj):
1018    """Returns True if obj is a dataclass or an instance of a
1019    dataclass."""
1020    return hasattr(obj, _FIELDS)
1021
1022
1023def asdict(obj, *, dict_factory=dict):
1024    """Return the fields of a dataclass instance as a new dictionary mapping
1025    field names to field values.
1026
1027    Example usage:
1028
1029      @dataclass
1030      class C:
1031          x: int
1032          y: int
1033
1034      c = C(1, 2)
1035      assert asdict(c) == {'x': 1, 'y': 2}
1036
1037    If given, 'dict_factory' will be used instead of built-in dict.
1038    The function applies recursively to field values that are
1039    dataclass instances. This will also look into built-in containers:
1040    tuples, lists, and dicts.
1041    """
1042    if not _is_dataclass_instance(obj):
1043        raise TypeError("asdict() should be called on dataclass instances")
1044    return _asdict_inner(obj, dict_factory)
1045
1046
1047def _asdict_inner(obj, dict_factory):
1048    if _is_dataclass_instance(obj):
1049        result = []
1050        for f in fields(obj):
1051            value = _asdict_inner(getattr(obj, f.name), dict_factory)
1052            result.append((f.name, value))
1053        return dict_factory(result)
1054    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
1055        # obj is a namedtuple.  Recurse into it, but the returned
1056        # object is another namedtuple of the same type.  This is
1057        # similar to how other list- or tuple-derived classes are
1058        # treated (see below), but we just need to create them
1059        # differently because a namedtuple's __init__ needs to be
1060        # called differently (see bpo-34363).
1061
1062        # I'm not using namedtuple's _asdict()
1063        # method, because:
1064        # - it does not recurse in to the namedtuple fields and
1065        #   convert them to dicts (using dict_factory).
1066        # - I don't actually want to return a dict here.  The the main
1067        #   use case here is json.dumps, and it handles converting
1068        #   namedtuples to lists.  Admittedly we're losing some
1069        #   information here when we produce a json list instead of a
1070        #   dict.  Note that if we returned dicts here instead of
1071        #   namedtuples, we could no longer call asdict() on a data
1072        #   structure where a namedtuple was used as a dict key.
1073
1074        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
1075    elif isinstance(obj, (list, tuple)):
1076        # Assume we can create an object of this type by passing in a
1077        # generator (which is not true for namedtuples, handled
1078        # above).
1079        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
1080    elif isinstance(obj, dict):
1081        return type(obj)((_asdict_inner(k, dict_factory),
1082                          _asdict_inner(v, dict_factory))
1083                         for k, v in obj.items())
1084    else:
1085        return copy.deepcopy(obj)
1086
1087
1088def astuple(obj, *, tuple_factory=tuple):
1089    """Return the fields of a dataclass instance as a new tuple of field values.
1090
1091    Example usage::
1092
1093      @dataclass
1094      class C:
1095          x: int
1096          y: int
1097
1098    c = C(1, 2)
1099    assert astuple(c) == (1, 2)
1100
1101    If given, 'tuple_factory' will be used instead of built-in tuple.
1102    The function applies recursively to field values that are
1103    dataclass instances. This will also look into built-in containers:
1104    tuples, lists, and dicts.
1105    """
1106
1107    if not _is_dataclass_instance(obj):
1108        raise TypeError("astuple() should be called on dataclass instances")
1109    return _astuple_inner(obj, tuple_factory)
1110
1111
1112def _astuple_inner(obj, tuple_factory):
1113    if _is_dataclass_instance(obj):
1114        result = []
1115        for f in fields(obj):
1116            value = _astuple_inner(getattr(obj, f.name), tuple_factory)
1117            result.append(value)
1118        return tuple_factory(result)
1119    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
1120        # obj is a namedtuple.  Recurse into it, but the returned
1121        # object is another namedtuple of the same type.  This is
1122        # similar to how other list- or tuple-derived classes are
1123        # treated (see below), but we just need to create them
1124        # differently because a namedtuple's __init__ needs to be
1125        # called differently (see bpo-34363).
1126        return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
1127    elif isinstance(obj, (list, tuple)):
1128        # Assume we can create an object of this type by passing in a
1129        # generator (which is not true for namedtuples, handled
1130        # above).
1131        return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
1132    elif isinstance(obj, dict):
1133        return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
1134                          for k, v in obj.items())
1135    else:
1136        return copy.deepcopy(obj)
1137
1138
1139def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
1140                   repr=True, eq=True, order=False, unsafe_hash=False,
1141                   frozen=False):
1142    """Return a new dynamically created dataclass.
1143
1144    The dataclass name will be 'cls_name'.  'fields' is an iterable
1145    of either (name), (name, type) or (name, type, Field) objects. If type is
1146    omitted, use the string 'typing.Any'.  Field objects are created by
1147    the equivalent of calling 'field(name, type [, Field-info])'.
1148
1149      C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,))
1150
1151    is equivalent to:
1152
1153      @dataclass
1154      class C(Base):
1155          x: 'typing.Any'
1156          y: int
1157          z: int = field(init=False)
1158
1159    For the bases and namespace parameters, see the builtin type() function.
1160
1161    The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
1162    dataclass().
1163    """
1164
1165    if namespace is None:
1166        namespace = {}
1167    else:
1168        # Copy namespace since we're going to mutate it.
1169        namespace = namespace.copy()
1170
1171    # While we're looking through the field names, validate that they
1172    # are identifiers, are not keywords, and not duplicates.
1173    seen = set()
1174    anns = {}
1175    for item in fields:
1176        if isinstance(item, str):
1177            name = item
1178            tp = 'typing.Any'
1179        elif len(item) == 2:
1180            name, tp, = item
1181        elif len(item) == 3:
1182            name, tp, spec = item
1183            namespace[name] = spec
1184        else:
1185            raise TypeError(f'Invalid field: {item!r}')
1186
1187        if not isinstance(name, str) or not name.isidentifier():
1188            raise TypeError(f'Field names must be valid identifers: {name!r}')
1189        if keyword.iskeyword(name):
1190            raise TypeError(f'Field names must not be keywords: {name!r}')
1191        if name in seen:
1192            raise TypeError(f'Field name duplicated: {name!r}')
1193
1194        seen.add(name)
1195        anns[name] = tp
1196
1197    namespace['__annotations__'] = anns
1198    # We use `types.new_class()` instead of simply `type()` to allow dynamic creation
1199    # of generic dataclassses.
1200    cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace))
1201    return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
1202                     unsafe_hash=unsafe_hash, frozen=frozen)
1203
1204
1205def replace(obj, **changes):
1206    """Return a new object replacing specified fields with new values.
1207
1208    This is especially useful for frozen classes.  Example usage:
1209
1210      @dataclass(frozen=True)
1211      class C:
1212          x: int
1213          y: int
1214
1215      c = C(1, 2)
1216      c1 = replace(c, x=3)
1217      assert c1.x == 3 and c1.y == 2
1218      """
1219
1220    # We're going to mutate 'changes', but that's okay because it's a
1221    # new dict, even if called with 'replace(obj, **my_changes)'.
1222
1223    if not _is_dataclass_instance(obj):
1224        raise TypeError("replace() should be called on dataclass instances")
1225
1226    # It's an error to have init=False fields in 'changes'.
1227    # If a field is not in 'changes', read its value from the provided obj.
1228
1229    for f in getattr(obj, _FIELDS).values():
1230        # Only consider normal fields or InitVars.
1231        if f._field_type is _FIELD_CLASSVAR:
1232            continue
1233
1234        if not f.init:
1235            # Error if this field is specified in changes.
1236            if f.name in changes:
1237                raise ValueError(f'field {f.name} is declared with '
1238                                 'init=False, it cannot be specified with '
1239                                 'replace()')
1240            continue
1241
1242        if f.name not in changes:
1243            if f._field_type is _FIELD_INITVAR:
1244                raise ValueError(f"InitVar {f.name!r} "
1245                                 'must be specified with replace()')
1246            changes[f.name] = getattr(obj, f.name)
1247
1248    # Create the new object, which calls __init__() and
1249    # __post_init__() (if defined), using all of the init fields we've
1250    # added and/or left in 'changes'.  If there are values supplied in
1251    # changes that aren't fields, this will correctly raise a
1252    # TypeError.
1253    return obj.__class__(**changes)
1254