• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_BUILD_CORE_BUILTIN
2 #  define Py_BUILD_CORE_MODULE 1
3 #endif
4 
5 #include "Python.h"
6 #include "pycore_critical_section.h"  // _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED()
7 #include "pycore_long.h"          // _PyLong_GetOne()
8 #include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
9 
10 #include "datetime.h"             // PyDateTime_TZInfo
11 
12 #include <stddef.h>               // offsetof()
13 #include <stdint.h>
14 
15 #include "clinic/_zoneinfo.c.h"
16 /*[clinic input]
17 module zoneinfo
18 class zoneinfo.ZoneInfo "PyObject *" "PyTypeObject *"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d12c73c0eef36df8]*/
21 
22 
23 typedef struct TransitionRuleType TransitionRuleType;
24 typedef struct StrongCacheNode StrongCacheNode;
25 
26 typedef struct {
27     PyObject *utcoff;
28     PyObject *dstoff;
29     PyObject *tzname;
30     long utcoff_seconds;
31 } _ttinfo;
32 
33 typedef struct {
34     _ttinfo std;
35     _ttinfo dst;
36     int dst_diff;
37     TransitionRuleType *start;
38     TransitionRuleType *end;
39     unsigned char std_only;
40 } _tzrule;
41 
42 typedef struct {
43     PyDateTime_TZInfo base;
44     PyObject *key;
45     PyObject *file_repr;
46     PyObject *weakreflist;
47     size_t num_transitions;
48     size_t num_ttinfos;
49     int64_t *trans_list_utc;
50     int64_t *trans_list_wall[2];
51     _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
52     _ttinfo *ttinfo_before;
53     _tzrule tzrule_after;
54     _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
55     unsigned char fixed_offset;
56     unsigned char source;
57 } PyZoneInfo_ZoneInfo;
58 
59 struct TransitionRuleType {
60     int64_t (*year_to_timestamp)(TransitionRuleType *, int);
61 };
62 
63 typedef struct {
64     TransitionRuleType base;
65     uint8_t month;      /* 1 - 12 */
66     uint8_t week;       /* 1 - 5 */
67     uint8_t day;        /* 0 - 6 */
68     int16_t hour;       /* -167 - 167, RFC 8536 §3.3.1 */
69     int8_t minute;      /* signed 2 digits */
70     int8_t second;      /* signed 2 digits */
71 } CalendarRule;
72 
73 typedef struct {
74     TransitionRuleType base;
75     uint8_t julian;     /* 0, 1 */
76     uint16_t day;       /* 0 - 365 */
77     int16_t hour;       /* -167 - 167, RFC 8536 §3.3.1 */
78     int8_t minute;      /* signed 2 digits */
79     int8_t second;      /* signed 2 digits */
80 } DayRule;
81 
82 struct StrongCacheNode {
83     StrongCacheNode *next;
84     StrongCacheNode *prev;
85     PyObject *key;
86     PyObject *zone;
87 };
88 
89 typedef struct {
90     PyTypeObject *ZoneInfoType;
91 
92     // Imports
93     PyObject *io_open;
94     PyObject *_tzpath_find_tzfile;
95     PyObject *_common_mod;
96 
97     // Caches
98     PyObject *TIMEDELTA_CACHE;
99     PyObject *ZONEINFO_WEAK_CACHE;
100     StrongCacheNode *ZONEINFO_STRONG_CACHE;
101 
102     _ttinfo NO_TTINFO;
103 } zoneinfo_state;
104 
105 // Constants
106 static const int EPOCHORDINAL = 719163;
107 static int DAYS_IN_MONTH[] = {
108     -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
109 };
110 
111 static int DAYS_BEFORE_MONTH[] = {
112     -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
113 };
114 
115 static const int SOURCE_NOCACHE = 0;
116 static const int SOURCE_CACHE = 1;
117 static const int SOURCE_FILE = 2;
118 
119 static const size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
120 
121 // Forward declarations
122 static int
123 load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self,
124           PyObject *file_obj);
125 static void
126 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
127                  unsigned char *isdsts, size_t num_transitions,
128                  size_t num_ttinfos);
129 static int
130 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
131             int64_t *trans_local[2], size_t num_ttinfos,
132             size_t num_transitions);
133 
134 static int
135 parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out);
136 
137 static int
138 parse_abbr(const char **p, PyObject **abbr);
139 static int
140 parse_tz_delta(const char **p, long *total_seconds);
141 static int
142 parse_transition_time(const char **p, int *hour, int *minute, int *second);
143 static int
144 parse_transition_rule(const char **p, TransitionRuleType **out);
145 
146 static _ttinfo *
147 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
148 static _ttinfo *
149 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
150                            unsigned char *fold);
151 
152 static int
153 build_ttinfo(zoneinfo_state *state, long utcoffset, long dstoffset,
154              PyObject *tzname, _ttinfo *out);
155 static void
156 xdecref_ttinfo(_ttinfo *ttinfo);
157 static int
158 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
159 
160 static int
161 build_tzrule(zoneinfo_state *state, PyObject *std_abbr, PyObject *dst_abbr,
162              long std_offset, long dst_offset, TransitionRuleType *start,
163              TransitionRuleType *end, _tzrule *out);
164 static void
165 free_tzrule(_tzrule *tzrule);
166 
167 static PyObject *
168 load_timedelta(zoneinfo_state *state, long seconds);
169 
170 static int
171 get_local_timestamp(PyObject *dt, int64_t *local_ts);
172 static _ttinfo *
173 find_ttinfo(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *dt);
174 
175 static int
176 ymd_to_ord(int y, int m, int d);
177 static int
178 is_leap_year(int year);
179 
180 static size_t
181 _bisect(const int64_t value, const int64_t *arr, size_t size);
182 
183 static int
184 eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
185                         PyObject *key);
186 static void
187 clear_strong_cache(zoneinfo_state *state, const PyTypeObject *const type);
188 static void
189 update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
190                     PyObject *key, PyObject *zone);
191 static PyObject *
192 zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
193                        PyObject *const key);
194 
195 static inline zoneinfo_state *
zoneinfo_get_state(PyObject * mod)196 zoneinfo_get_state(PyObject *mod)
197 {
198     zoneinfo_state *state = (zoneinfo_state *)PyModule_GetState(mod);
199     assert(state != NULL);
200     return state;
201 }
202 
203 static inline zoneinfo_state *
zoneinfo_get_state_by_cls(PyTypeObject * cls)204 zoneinfo_get_state_by_cls(PyTypeObject *cls)
205 {
206     zoneinfo_state *state = (zoneinfo_state *)_PyType_GetModuleState(cls);
207     assert(state != NULL);
208     return state;
209 }
210 
211 static struct PyModuleDef zoneinfomodule;
212 
213 static inline zoneinfo_state *
zoneinfo_get_state_by_self(PyTypeObject * self)214 zoneinfo_get_state_by_self(PyTypeObject *self)
215 {
216     PyObject *mod = PyType_GetModuleByDef(self, &zoneinfomodule);
217     assert(mod != NULL);
218     return zoneinfo_get_state(mod);
219 }
220 
221 static PyObject *
zoneinfo_new_instance(zoneinfo_state * state,PyTypeObject * type,PyObject * key)222 zoneinfo_new_instance(zoneinfo_state *state, PyTypeObject *type, PyObject *key)
223 {
224     PyObject *file_obj = NULL;
225     PyObject *file_path = NULL;
226 
227     file_path = PyObject_CallFunctionObjArgs(state->_tzpath_find_tzfile,
228                                              key, NULL);
229     if (file_path == NULL) {
230         return NULL;
231     }
232     else if (file_path == Py_None) {
233         PyObject *meth = state->_common_mod;
234         file_obj = PyObject_CallMethod(meth, "load_tzdata", "O", key);
235         if (file_obj == NULL) {
236             Py_DECREF(file_path);
237             return NULL;
238         }
239     }
240 
241     PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
242     if (self == NULL) {
243         goto error;
244     }
245 
246     if (file_obj == NULL) {
247         PyObject *func = state->io_open;
248         file_obj = PyObject_CallFunction(func, "Os", file_path, "rb");
249         if (file_obj == NULL) {
250             goto error;
251         }
252     }
253 
254     if (load_data(state, (PyZoneInfo_ZoneInfo *)self, file_obj)) {
255         goto error;
256     }
257 
258     PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
259     Py_SETREF(file_obj, NULL);
260     if (rv == NULL) {
261         goto error;
262     }
263     Py_DECREF(rv);
264 
265     ((PyZoneInfo_ZoneInfo *)self)->key = Py_NewRef(key);
266 
267     goto cleanup;
268 error:
269     Py_CLEAR(self);
270 cleanup:
271     if (file_obj != NULL) {
272         PyObject *exc = PyErr_GetRaisedException();
273         PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
274         _PyErr_ChainExceptions1(exc);
275         if (tmp == NULL) {
276             Py_CLEAR(self);
277         }
278         Py_XDECREF(tmp);
279         Py_DECREF(file_obj);
280     }
281     Py_DECREF(file_path);
282     return self;
283 }
284 
285 static PyObject *
get_weak_cache(zoneinfo_state * state,PyTypeObject * type)286 get_weak_cache(zoneinfo_state *state, PyTypeObject *type)
287 {
288     if (type == state->ZoneInfoType) {
289         return state->ZONEINFO_WEAK_CACHE;
290     }
291     else {
292         PyObject *cache =
293             PyObject_GetAttrString((PyObject *)type, "_weak_cache");
294         // We are assuming that the type lives at least as long as the function
295         // that calls get_weak_cache, and that it holds a reference to the
296         // cache, so we'll return a "borrowed reference".
297         Py_XDECREF(cache);
298         return cache;
299     }
300 }
301 
302 /*[clinic input]
303 @critical_section
304 @classmethod
305 zoneinfo.ZoneInfo.__new__
306 
307     key: object
308 
309 Create a new ZoneInfo instance.
310 [clinic start generated code]*/
311 
312 static PyObject *
zoneinfo_ZoneInfo_impl(PyTypeObject * type,PyObject * key)313 zoneinfo_ZoneInfo_impl(PyTypeObject *type, PyObject *key)
314 /*[clinic end generated code: output=95e61dab86bb95c3 input=ef73d7a83bf8790e]*/
315 {
316     zoneinfo_state *state = zoneinfo_get_state_by_self(type);
317     PyObject *instance = zone_from_strong_cache(state, type, key);
318     if (instance != NULL || PyErr_Occurred()) {
319         return instance;
320     }
321 
322     PyObject *weak_cache = get_weak_cache(state, type);
323     instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
324     if (instance == NULL) {
325         return NULL;
326     }
327 
328     if (instance == Py_None) {
329         Py_DECREF(instance);
330         PyObject *tmp = zoneinfo_new_instance(state, type, key);
331         if (tmp == NULL) {
332             return NULL;
333         }
334 
335         instance =
336             PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
337         Py_DECREF(tmp);
338         if (instance == NULL) {
339             return NULL;
340         }
341         ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
342     }
343 
344     update_strong_cache(state, type, key, instance);
345     return instance;
346 }
347 
348 static int
zoneinfo_traverse(PyZoneInfo_ZoneInfo * self,visitproc visit,void * arg)349 zoneinfo_traverse(PyZoneInfo_ZoneInfo *self, visitproc visit, void *arg)
350 {
351     Py_VISIT(Py_TYPE(self));
352     Py_VISIT(self->key);
353     return 0;
354 }
355 
356 static int
zoneinfo_clear(PyZoneInfo_ZoneInfo * self)357 zoneinfo_clear(PyZoneInfo_ZoneInfo *self)
358 {
359     Py_CLEAR(self->key);
360     Py_CLEAR(self->file_repr);
361     return 0;
362 }
363 
364 static void
zoneinfo_dealloc(PyObject * obj_self)365 zoneinfo_dealloc(PyObject *obj_self)
366 {
367     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
368     PyTypeObject *tp = Py_TYPE(self);
369     PyObject_GC_UnTrack(self);
370 
371     if (self->weakreflist != NULL) {
372         PyObject_ClearWeakRefs(obj_self);
373     }
374 
375     if (self->trans_list_utc != NULL) {
376         PyMem_Free(self->trans_list_utc);
377     }
378 
379     for (size_t i = 0; i < 2; i++) {
380         if (self->trans_list_wall[i] != NULL) {
381             PyMem_Free(self->trans_list_wall[i]);
382         }
383     }
384 
385     if (self->_ttinfos != NULL) {
386         for (size_t i = 0; i < self->num_ttinfos; ++i) {
387             xdecref_ttinfo(&(self->_ttinfos[i]));
388         }
389         PyMem_Free(self->_ttinfos);
390     }
391 
392     if (self->trans_ttinfos != NULL) {
393         PyMem_Free(self->trans_ttinfos);
394     }
395 
396     free_tzrule(&(self->tzrule_after));
397 
398     zoneinfo_clear(self);
399     tp->tp_free(obj_self);
400     Py_DECREF(tp);
401 }
402 
403 /*[clinic input]
404 @classmethod
405 zoneinfo.ZoneInfo.from_file
406 
407     cls: defining_class
408     file_obj: object
409     /
410     key: object = None
411 
412 Create a ZoneInfo file from a file object.
413 [clinic start generated code]*/
414 
415 static PyObject *
zoneinfo_ZoneInfo_from_file_impl(PyTypeObject * type,PyTypeObject * cls,PyObject * file_obj,PyObject * key)416 zoneinfo_ZoneInfo_from_file_impl(PyTypeObject *type, PyTypeObject *cls,
417                                  PyObject *file_obj, PyObject *key)
418 /*[clinic end generated code: output=77887d1d56a48324 input=d26111f29eed6863]*/
419 {
420     PyObject *file_repr = NULL;
421     PyZoneInfo_ZoneInfo *self = NULL;
422 
423     PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
424     self = (PyZoneInfo_ZoneInfo *)obj_self;
425     if (self == NULL) {
426         return NULL;
427     }
428 
429     file_repr = PyObject_Repr(file_obj);
430     if (file_repr == NULL) {
431         goto error;
432     }
433 
434     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
435     if (load_data(state, self, file_obj)) {
436         goto error;
437     }
438 
439     self->source = SOURCE_FILE;
440     self->file_repr = file_repr;
441     self->key = Py_NewRef(key);
442     return obj_self;
443 
444 error:
445     Py_XDECREF(file_repr);
446     Py_XDECREF(self);
447     return NULL;
448 }
449 
450 /*[clinic input]
451 @classmethod
452 zoneinfo.ZoneInfo.no_cache
453 
454     cls: defining_class
455     /
456     key: object
457 
458 Get a new instance of ZoneInfo, bypassing the cache.
459 [clinic start generated code]*/
460 
461 static PyObject *
zoneinfo_ZoneInfo_no_cache_impl(PyTypeObject * type,PyTypeObject * cls,PyObject * key)462 zoneinfo_ZoneInfo_no_cache_impl(PyTypeObject *type, PyTypeObject *cls,
463                                 PyObject *key)
464 /*[clinic end generated code: output=b0b09b3344c171b7 input=0238f3d56b1ea3f1]*/
465 {
466     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
467     PyObject *out = zoneinfo_new_instance(state, type, key);
468     if (out != NULL) {
469         ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
470     }
471 
472     return out;
473 }
474 
475 /*[clinic input]
476 @critical_section
477 @classmethod
478 zoneinfo.ZoneInfo.clear_cache
479 
480     cls: defining_class
481     /
482     *
483     only_keys: object = None
484 
485 Clear the ZoneInfo cache.
486 [clinic start generated code]*/
487 
488 static PyObject *
zoneinfo_ZoneInfo_clear_cache_impl(PyTypeObject * type,PyTypeObject * cls,PyObject * only_keys)489 zoneinfo_ZoneInfo_clear_cache_impl(PyTypeObject *type, PyTypeObject *cls,
490                                    PyObject *only_keys)
491 /*[clinic end generated code: output=114d9b7c8a22e660 input=35944715df26d24e]*/
492 {
493     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
494     PyObject *weak_cache = get_weak_cache(state, type);
495 
496     if (only_keys == NULL || only_keys == Py_None) {
497         PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
498         if (rv != NULL) {
499             Py_DECREF(rv);
500         }
501 
502         clear_strong_cache(state, type);
503     }
504     else {
505         PyObject *item = NULL;
506         PyObject *pop = PyUnicode_FromString("pop");
507         if (pop == NULL) {
508             return NULL;
509         }
510 
511         PyObject *iter = PyObject_GetIter(only_keys);
512         if (iter == NULL) {
513             Py_DECREF(pop);
514             return NULL;
515         }
516 
517         while ((item = PyIter_Next(iter))) {
518             // Remove from strong cache
519             if (eject_from_strong_cache(state, type, item) < 0) {
520                 Py_DECREF(item);
521                 break;
522             }
523 
524             // Remove from weak cache
525             PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
526                                                        Py_None, NULL);
527 
528             Py_DECREF(item);
529             if (tmp == NULL) {
530                 break;
531             }
532             Py_DECREF(tmp);
533         }
534         Py_DECREF(iter);
535         Py_DECREF(pop);
536     }
537 
538     if (PyErr_Occurred()) {
539         return NULL;
540     }
541 
542     Py_RETURN_NONE;
543 }
544 
545 /*[clinic input]
546 zoneinfo.ZoneInfo.utcoffset
547 
548     cls: defining_class
549     dt: object
550     /
551 
552 Retrieve a timedelta representing the UTC offset in a zone at the given datetime.
553 [clinic start generated code]*/
554 
555 static PyObject *
zoneinfo_ZoneInfo_utcoffset_impl(PyObject * self,PyTypeObject * cls,PyObject * dt)556 zoneinfo_ZoneInfo_utcoffset_impl(PyObject *self, PyTypeObject *cls,
557                                  PyObject *dt)
558 /*[clinic end generated code: output=b71016c319ba1f91 input=2bb6c5364938f19c]*/
559 {
560     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
561     _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
562     if (tti == NULL) {
563         return NULL;
564     }
565     return Py_NewRef(tti->utcoff);
566 }
567 
568 /*[clinic input]
569 zoneinfo.ZoneInfo.dst
570 
571     cls: defining_class
572     dt: object
573     /
574 
575 Retrieve a timedelta representing the amount of DST applied in a zone at the given datetime.
576 [clinic start generated code]*/
577 
578 static PyObject *
zoneinfo_ZoneInfo_dst_impl(PyObject * self,PyTypeObject * cls,PyObject * dt)579 zoneinfo_ZoneInfo_dst_impl(PyObject *self, PyTypeObject *cls, PyObject *dt)
580 /*[clinic end generated code: output=cb6168d7723a6ae6 input=2167fb80cf8645c6]*/
581 {
582     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
583     _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
584     if (tti == NULL) {
585         return NULL;
586     }
587     return Py_NewRef(tti->dstoff);
588 }
589 
590 /*[clinic input]
591 zoneinfo.ZoneInfo.tzname
592 
593     cls: defining_class
594     dt: object
595     /
596 
597 Retrieve a string containing the abbreviation for the time zone that applies in a zone at a given datetime.
598 [clinic start generated code]*/
599 
600 static PyObject *
zoneinfo_ZoneInfo_tzname_impl(PyObject * self,PyTypeObject * cls,PyObject * dt)601 zoneinfo_ZoneInfo_tzname_impl(PyObject *self, PyTypeObject *cls,
602                               PyObject *dt)
603 /*[clinic end generated code: output=3b6ae6c3053ea75a input=15a59a4f92ed1f1f]*/
604 {
605     zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
606     _ttinfo *tti = find_ttinfo(state, (PyZoneInfo_ZoneInfo *)self, dt);
607     if (tti == NULL) {
608         return NULL;
609     }
610     return Py_NewRef(tti->tzname);
611 }
612 
613 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
614 
615 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)616 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
617 {
618     if (!PyDateTime_Check(dt)) {
619         PyErr_SetString(PyExc_TypeError,
620                         "fromutc: argument must be a datetime");
621         return NULL;
622     }
623     if (GET_DT_TZINFO(dt) != obj_self) {
624         PyErr_SetString(PyExc_ValueError,
625                         "fromutc: dt.tzinfo "
626                         "is not self");
627         return NULL;
628     }
629 
630     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
631 
632     int64_t timestamp;
633     if (get_local_timestamp(dt, &timestamp)) {
634         return NULL;
635     }
636     size_t num_trans = self->num_transitions;
637 
638     _ttinfo *tti = NULL;
639     unsigned char fold = 0;
640 
641     if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
642         tti = self->ttinfo_before;
643     }
644     else if (num_trans == 0 ||
645              timestamp > self->trans_list_utc[num_trans - 1]) {
646         tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
647                                          PyDateTime_GET_YEAR(dt), &fold);
648 
649         // Immediately after the last manual transition, the fold/gap is
650         // between self->trans_ttinfos[num_transitions - 1] and whatever
651         // ttinfo applies immediately after the last transition, not between
652         // the STD and DST rules in the tzrule_after, so we may need to
653         // adjust the fold value.
654         if (num_trans) {
655             _ttinfo *tti_prev = NULL;
656             if (num_trans == 1) {
657                 tti_prev = self->ttinfo_before;
658             }
659             else {
660                 tti_prev = self->trans_ttinfos[num_trans - 2];
661             }
662             int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
663             if (diff > 0 &&
664                 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
665                 fold = 1;
666             }
667         }
668     }
669     else {
670         size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
671         _ttinfo *tti_prev = NULL;
672 
673         if (idx >= 2) {
674             tti_prev = self->trans_ttinfos[idx - 2];
675             tti = self->trans_ttinfos[idx - 1];
676         }
677         else {
678             tti_prev = self->ttinfo_before;
679             tti = self->trans_ttinfos[0];
680         }
681 
682         // Detect fold
683         int64_t shift =
684             (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
685         if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
686             fold = 1;
687         }
688     }
689 
690     PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
691     if (tmp == NULL) {
692         return NULL;
693     }
694 
695     if (fold) {
696         if (PyDateTime_CheckExact(tmp)) {
697             ((PyDateTime_DateTime *)tmp)->fold = 1;
698             dt = tmp;
699         }
700         else {
701             PyObject *replace = PyObject_GetAttrString(tmp, "replace");
702             Py_DECREF(tmp);
703             if (replace == NULL) {
704                 return NULL;
705             }
706             PyObject *args = PyTuple_New(0);
707             if (args == NULL) {
708                 Py_DECREF(replace);
709                 return NULL;
710             }
711             PyObject *kwargs = PyDict_New();
712             if (kwargs == NULL) {
713                 Py_DECREF(replace);
714                 Py_DECREF(args);
715                 return NULL;
716             }
717 
718             dt = NULL;
719             if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
720                 dt = PyObject_Call(replace, args, kwargs);
721             }
722 
723             Py_DECREF(args);
724             Py_DECREF(kwargs);
725             Py_DECREF(replace);
726 
727             if (dt == NULL) {
728                 return NULL;
729             }
730         }
731     }
732     else {
733         dt = tmp;
734     }
735     return dt;
736 }
737 
738 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)739 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
740 {
741     PyObject *rv = NULL;
742     const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
743     if (!(self->key == Py_None)) {
744         rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
745     }
746     else {
747         assert(PyUnicode_Check(self->file_repr));
748         rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
749                                   self->file_repr);
750     }
751 
752     return rv;
753 }
754 
755 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)756 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
757 {
758     if (!(self->key == Py_None)) {
759         return Py_NewRef(self->key);
760     }
761     else {
762         return zoneinfo_repr(self);
763     }
764 }
765 
766 /* Pickles the ZoneInfo object by key and source.
767  *
768  * ZoneInfo objects are pickled by reference to the TZif file that they came
769  * from, which means that the exact transitions may be different or the file
770  * may not un-pickle if the data has changed on disk in the interim.
771  *
772  * It is necessary to include a bit indicating whether or not the object
773  * was constructed from the cache, because from-cache objects will hit the
774  * unpickling process's cache, whereas no-cache objects will bypass it.
775  *
776  * Objects constructed from ZoneInfo.from_file cannot be pickled.
777  */
778 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)779 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
780 {
781     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
782     if (self->source == SOURCE_FILE) {
783         // Objects constructed from files cannot be pickled.
784         PyObject *pickle_error =
785             _PyImport_GetModuleAttrString("pickle", "PicklingError");
786         if (pickle_error == NULL) {
787             return NULL;
788         }
789 
790         PyErr_Format(pickle_error,
791                      "Cannot pickle a ZoneInfo file from a file stream.");
792         Py_DECREF(pickle_error);
793         return NULL;
794     }
795 
796     unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
797     PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
798 
799     if (constructor == NULL) {
800         return NULL;
801     }
802 
803     PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
804     Py_DECREF(constructor);
805     return rv;
806 }
807 
808 /*[clinic input]
809 @classmethod
810 zoneinfo.ZoneInfo._unpickle
811 
812     cls: defining_class
813     key: object
814     from_cache: unsigned_char(bitwise=True)
815     /
816 
817 Private method used in unpickling.
818 [clinic start generated code]*/
819 
820 static PyObject *
zoneinfo_ZoneInfo__unpickle_impl(PyTypeObject * type,PyTypeObject * cls,PyObject * key,unsigned char from_cache)821 zoneinfo_ZoneInfo__unpickle_impl(PyTypeObject *type, PyTypeObject *cls,
822                                  PyObject *key, unsigned char from_cache)
823 /*[clinic end generated code: output=556712fc709deecb input=6ac8c73eed3de316]*/
824 {
825     if (from_cache) {
826         PyObject *rv;
827         Py_BEGIN_CRITICAL_SECTION(type);
828         rv = zoneinfo_ZoneInfo_impl(type, key);
829         Py_END_CRITICAL_SECTION();
830         return rv;
831     }
832     else {
833         zoneinfo_state *state = zoneinfo_get_state_by_cls(cls);
834         return zoneinfo_new_instance(state, type, key);
835     }
836 }
837 
838 /* It is relatively expensive to construct new timedelta objects, and in most
839  * cases we're looking at a relatively small number of timedeltas, such as
840  * integer number of hours, etc. We will keep a cache so that we construct
841  * a minimal number of these.
842  *
843  * Possibly this should be replaced with an LRU cache so that it's not possible
844  * for the memory usage to explode from this, but in order for this to be a
845  * serious problem, one would need to deliberately craft a malicious time zone
846  * file with many distinct offsets. As of tzdb 2019c, loading every single zone
847  * fills the cache with ~450 timedeltas for a total size of ~12kB.
848  *
849  * This returns a new reference to the timedelta.
850  */
851 static PyObject *
load_timedelta(zoneinfo_state * state,long seconds)852 load_timedelta(zoneinfo_state *state, long seconds)
853 {
854     PyObject *rv;
855     PyObject *pyoffset = PyLong_FromLong(seconds);
856     if (pyoffset == NULL) {
857         return NULL;
858     }
859     if (PyDict_GetItemRef(state->TIMEDELTA_CACHE, pyoffset, &rv) == 0) {
860         PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
861             0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
862 
863         if (tmp != NULL) {
864             PyDict_SetDefaultRef(state->TIMEDELTA_CACHE, pyoffset, tmp, &rv);
865             Py_DECREF(tmp);
866         }
867     }
868 
869     Py_DECREF(pyoffset);
870     return rv;
871 }
872 
873 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
874  * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
875  * initialized _ttinfo objects.
876  */
877 static int
build_ttinfo(zoneinfo_state * state,long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)878 build_ttinfo(zoneinfo_state *state, long utcoffset, long dstoffset,
879              PyObject *tzname, _ttinfo *out)
880 {
881     out->utcoff = NULL;
882     out->dstoff = NULL;
883     out->tzname = NULL;
884 
885     out->utcoff_seconds = utcoffset;
886     out->utcoff = load_timedelta(state, utcoffset);
887     if (out->utcoff == NULL) {
888         return -1;
889     }
890 
891     out->dstoff = load_timedelta(state, dstoffset);
892     if (out->dstoff == NULL) {
893         return -1;
894     }
895 
896     out->tzname = Py_NewRef(tzname);
897 
898     return 0;
899 }
900 
901 /* Decrease reference count on any non-NULL members of a _ttinfo  */
902 static void
xdecref_ttinfo(_ttinfo * ttinfo)903 xdecref_ttinfo(_ttinfo *ttinfo)
904 {
905     if (ttinfo != NULL) {
906         Py_XDECREF(ttinfo->utcoff);
907         Py_XDECREF(ttinfo->dstoff);
908         Py_XDECREF(ttinfo->tzname);
909     }
910 }
911 
912 /* Equality function for _ttinfo. */
913 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)914 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
915 {
916     int rv;
917     if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
918         1) {
919         goto end;
920     }
921 
922     if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
923         1) {
924         goto end;
925     }
926 
927     if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
928         1) {
929         goto end;
930     }
931 end:
932     return rv;
933 }
934 
935 /* Given a file-like object, this populates a ZoneInfo object
936  *
937  * The current version calls into a Python function to read the data from
938  * file into Python objects, and this translates those Python objects into
939  * C values and calculates derived values (e.g. dstoff) in C.
940  *
941  * This returns 0 on success and -1 on failure.
942  *
943  * The function will never return while `self` is partially initialized —
944  * the object only needs to be freed / deallocated if this succeeds.
945  */
946 static int
load_data(zoneinfo_state * state,PyZoneInfo_ZoneInfo * self,PyObject * file_obj)947 load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
948 {
949     int rv = 0;
950     PyObject *data_tuple = NULL;
951 
952     long *utcoff = NULL;
953     long *dstoff = NULL;
954     size_t *trans_idx = NULL;
955     unsigned char *isdst = NULL;
956 
957     self->trans_list_utc = NULL;
958     self->trans_list_wall[0] = NULL;
959     self->trans_list_wall[1] = NULL;
960     self->trans_ttinfos = NULL;
961     self->_ttinfos = NULL;
962     self->file_repr = NULL;
963 
964     size_t ttinfos_allocated = 0;
965 
966     data_tuple = PyObject_CallMethod(state->_common_mod, "load_data", "O",
967                                      file_obj);
968 
969     if (data_tuple == NULL) {
970         goto error;
971     }
972 
973     if (!PyTuple_CheckExact(data_tuple)) {
974         PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
975                      data_tuple);
976         goto error;
977     }
978 
979     // Unpack the data tuple
980     PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
981     if (trans_idx_list == NULL) {
982         goto error;
983     }
984 
985     PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
986     if (trans_utc == NULL) {
987         goto error;
988     }
989 
990     PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
991     if (utcoff_list == NULL) {
992         goto error;
993     }
994 
995     PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
996     if (isdst_list == NULL) {
997         goto error;
998     }
999 
1000     PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
1001     if (abbr == NULL) {
1002         goto error;
1003     }
1004 
1005     PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
1006     if (tz_str == NULL) {
1007         goto error;
1008     }
1009 
1010     // Load the relevant sizes
1011     Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
1012     if (num_transitions < 0) {
1013         goto error;
1014     }
1015 
1016     Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
1017     if (num_ttinfos < 0) {
1018         goto error;
1019     }
1020 
1021     self->num_transitions = (size_t)num_transitions;
1022     self->num_ttinfos = (size_t)num_ttinfos;
1023 
1024     // Load the transition indices and list
1025     self->trans_list_utc =
1026         PyMem_Malloc(self->num_transitions * sizeof(int64_t));
1027     if (self->trans_list_utc == NULL) {
1028         goto error;
1029     }
1030     trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
1031     if (trans_idx == NULL) {
1032         goto error;
1033     }
1034 
1035     for (size_t i = 0; i < self->num_transitions; ++i) {
1036         PyObject *num = PyTuple_GetItem(trans_utc, i);
1037         if (num == NULL) {
1038             goto error;
1039         }
1040         self->trans_list_utc[i] = PyLong_AsLongLong(num);
1041         if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
1042             goto error;
1043         }
1044 
1045         num = PyTuple_GetItem(trans_idx_list, i);
1046         if (num == NULL) {
1047             goto error;
1048         }
1049 
1050         Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
1051         if (cur_trans_idx == -1) {
1052             goto error;
1053         }
1054 
1055         trans_idx[i] = (size_t)cur_trans_idx;
1056         if (trans_idx[i] > self->num_ttinfos) {
1057             PyErr_Format(
1058                 PyExc_ValueError,
1059                 "Invalid transition index found while reading TZif: %zd",
1060                 cur_trans_idx);
1061 
1062             goto error;
1063         }
1064     }
1065 
1066     // Load UTC offsets and isdst (size num_ttinfos)
1067     utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
1068     isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
1069 
1070     if (utcoff == NULL || isdst == NULL) {
1071         goto error;
1072     }
1073     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1074         PyObject *num = PyTuple_GetItem(utcoff_list, i);
1075         if (num == NULL) {
1076             goto error;
1077         }
1078 
1079         utcoff[i] = PyLong_AsLong(num);
1080         if (utcoff[i] == -1 && PyErr_Occurred()) {
1081             goto error;
1082         }
1083 
1084         num = PyTuple_GetItem(isdst_list, i);
1085         if (num == NULL) {
1086             goto error;
1087         }
1088 
1089         int isdst_with_error = PyObject_IsTrue(num);
1090         if (isdst_with_error == -1) {
1091             goto error;
1092         }
1093         else {
1094             isdst[i] = (unsigned char)isdst_with_error;
1095         }
1096     }
1097 
1098     dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
1099     if (dstoff == NULL) {
1100         goto error;
1101     }
1102 
1103     // Derive dstoff and trans_list_wall from the information we've loaded
1104     utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
1105                      self->num_ttinfos);
1106 
1107     if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
1108                     self->trans_list_wall, self->num_ttinfos,
1109                     self->num_transitions)) {
1110         goto error;
1111     }
1112 
1113     // Build _ttinfo objects from utcoff, dstoff and abbr
1114     self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1115     if (self->_ttinfos == NULL) {
1116         goto error;
1117     }
1118     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1119         PyObject *tzname = PyTuple_GetItem(abbr, i);
1120         if (tzname == NULL) {
1121             goto error;
1122         }
1123 
1124         ttinfos_allocated++;
1125         int rc = build_ttinfo(state, utcoff[i], dstoff[i], tzname,
1126                               &(self->_ttinfos[i]));
1127         if (rc) {
1128             goto error;
1129         }
1130     }
1131 
1132     // Build our mapping from transition to the ttinfo that applies
1133     self->trans_ttinfos =
1134         PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1135     if (self->trans_ttinfos == NULL) {
1136         goto error;
1137     }
1138     for (size_t i = 0; i < self->num_transitions; ++i) {
1139         size_t ttinfo_idx = trans_idx[i];
1140         assert(ttinfo_idx < self->num_ttinfos);
1141         self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1142     }
1143 
1144     // Set ttinfo_before to the first non-DST transition
1145     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1146         if (!isdst[i]) {
1147             self->ttinfo_before = &(self->_ttinfos[i]);
1148             break;
1149         }
1150     }
1151 
1152     // If there are only DST ttinfos, pick the first one, if there are no
1153     // ttinfos at all, set ttinfo_before to NULL
1154     if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1155         self->ttinfo_before = &(self->_ttinfos[0]);
1156     }
1157 
1158     if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1159         if (parse_tz_str(state, tz_str, &(self->tzrule_after))) {
1160             goto error;
1161         }
1162     }
1163     else {
1164         if (!self->num_ttinfos) {
1165             PyErr_Format(PyExc_ValueError, "No time zone information found.");
1166             goto error;
1167         }
1168 
1169         size_t idx;
1170         if (!self->num_transitions) {
1171             idx = self->num_ttinfos - 1;
1172         }
1173         else {
1174             idx = trans_idx[self->num_transitions - 1];
1175         }
1176 
1177         _ttinfo *tti = &(self->_ttinfos[idx]);
1178         build_tzrule(state, tti->tzname, NULL, tti->utcoff_seconds, 0, NULL,
1179                      NULL, &(self->tzrule_after));
1180 
1181         // We've abused the build_tzrule constructor to construct an STD-only
1182         // rule mimicking whatever ttinfo we've picked up, but it's possible
1183         // that the one we've picked up is a DST zone, so we need to make sure
1184         // that the dstoff is set correctly in that case.
1185         if (PyObject_IsTrue(tti->dstoff)) {
1186             _ttinfo *tti_after = &(self->tzrule_after.std);
1187             Py_SETREF(tti_after->dstoff, Py_NewRef(tti->dstoff));
1188         }
1189     }
1190 
1191     // Determine if this is a "fixed offset" zone, meaning that the output of
1192     // the utcoffset, dst and tzname functions does not depend on the specific
1193     // datetime passed.
1194     //
1195     // We make three simplifying assumptions here:
1196     //
1197     // 1. If tzrule_after is not std_only, it has transitions that might occur
1198     //    (it is possible to construct TZ strings that specify STD and DST but
1199     //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1200     // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1201     //    represent different offsets.
1202     // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1203     //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1204     //    a fixed offset zone).
1205     //
1206     // Violations to these assumptions would be fairly exotic, and exotic
1207     // zones should almost certainly not be used with datetime.time (the
1208     // only thing that would be affected by this).
1209     if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1210         self->fixed_offset = 0;
1211     }
1212     else if (self->num_ttinfos == 0) {
1213         self->fixed_offset = 1;
1214     }
1215     else {
1216         int constant_offset =
1217             ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1218         if (constant_offset < 0) {
1219             goto error;
1220         }
1221         else {
1222             self->fixed_offset = constant_offset;
1223         }
1224     }
1225 
1226     goto cleanup;
1227 error:
1228     // These resources only need to be freed if we have failed, if we succeed
1229     // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1230     // method to free the relevant resources.
1231     if (self->trans_list_utc != NULL) {
1232         PyMem_Free(self->trans_list_utc);
1233         self->trans_list_utc = NULL;
1234     }
1235 
1236     for (size_t i = 0; i < 2; ++i) {
1237         if (self->trans_list_wall[i] != NULL) {
1238             PyMem_Free(self->trans_list_wall[i]);
1239             self->trans_list_wall[i] = NULL;
1240         }
1241     }
1242 
1243     if (self->_ttinfos != NULL) {
1244         for (size_t i = 0; i < ttinfos_allocated; ++i) {
1245             xdecref_ttinfo(&(self->_ttinfos[i]));
1246         }
1247         PyMem_Free(self->_ttinfos);
1248         self->_ttinfos = NULL;
1249     }
1250 
1251     if (self->trans_ttinfos != NULL) {
1252         PyMem_Free(self->trans_ttinfos);
1253         self->trans_ttinfos = NULL;
1254     }
1255 
1256     rv = -1;
1257 cleanup:
1258     Py_XDECREF(data_tuple);
1259 
1260     if (utcoff != NULL) {
1261         PyMem_Free(utcoff);
1262     }
1263 
1264     if (dstoff != NULL) {
1265         PyMem_Free(dstoff);
1266     }
1267 
1268     if (isdst != NULL) {
1269         PyMem_Free(isdst);
1270     }
1271 
1272     if (trans_idx != NULL) {
1273         PyMem_Free(trans_idx);
1274     }
1275 
1276     return rv;
1277 }
1278 
1279 /* Function to calculate the local timestamp of a transition from the year. */
1280 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1281 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1282 {
1283     CalendarRule *self = (CalendarRule *)base_self;
1284 
1285     // We want (year, month, day of month); we have year and month, but we
1286     // need to turn (week, day-of-week) into day-of-month
1287     //
1288     // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1289     // Week 5 represents the last occurrence of day `day`, so we need to know
1290     // the first weekday of the month and the number of days in the month.
1291     int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1292     uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1293     if (self->month == 2 && is_leap_year(year)) {
1294         days_in_month += 1;
1295     }
1296 
1297     // This equation seems magical, so I'll break it down:
1298     // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1299     //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1300     //    because this math is mod 7
1301     // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1302     //    numbers so that -1 % 7 = 6).
1303     // 3. Add 1 because month days are a 1-based index.
1304     int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1305     if (month_day < 0) {
1306         month_day += 7;
1307     }
1308     month_day += 1;
1309 
1310     // Now use a 0-based index version of `week` to calculate the w-th
1311     // occurrence of `day`
1312     month_day += ((int8_t)(self->week) - 1) * 7;
1313 
1314     // month_day will only be > days_in_month if w was 5, and `w` means "last
1315     // occurrence of `d`", so now we just check if we over-shot the end of the
1316     // month and if so knock off 1 week.
1317     if (month_day > days_in_month) {
1318         month_day -= 7;
1319     }
1320 
1321     int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1322     return ordinal * 86400 + (int64_t)self->hour * 3600 +
1323             (int64_t)self->minute * 60 + self->second;
1324 }
1325 
1326 /* Constructor for CalendarRule. */
1327 int
calendarrule_new(int month,int week,int day,int hour,int minute,int second,CalendarRule * out)1328 calendarrule_new(int month, int week, int day, int hour,
1329                  int minute, int second, CalendarRule *out)
1330 {
1331     // These bounds come from the POSIX standard, which describes an Mm.n.d
1332     // rule as:
1333     //
1334     //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1335     //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1336     //   may occur in either the fourth or the fifth week). Week 1 is the first
1337     //   week in which the d'th day occurs. Day zero is Sunday.
1338     if (month < 1 || month > 12) {
1339         PyErr_Format(PyExc_ValueError, "Month must be in [1, 12]");
1340         return -1;
1341     }
1342 
1343     if (week < 1 || week > 5) {
1344         PyErr_Format(PyExc_ValueError, "Week must be in [1, 5]");
1345         return -1;
1346     }
1347 
1348     if (day < 0 || day > 6) {
1349         PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1350         return -1;
1351     }
1352 
1353     if (hour < -167 || hour > 167) {
1354         PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]");
1355         return -1;
1356     }
1357 
1358     TransitionRuleType base = {&calendarrule_year_to_timestamp};
1359 
1360     CalendarRule new_offset = {
1361         .base = base,
1362         .month = (uint8_t)month,
1363         .week = (uint8_t)week,
1364         .day = (uint8_t)day,
1365         .hour = (int16_t)hour,
1366         .minute = (int8_t)minute,
1367         .second = (int8_t)second,
1368     };
1369 
1370     *out = new_offset;
1371     return 0;
1372 }
1373 
1374 /* Function to calculate the local timestamp of a transition from the year.
1375  *
1376  * This translates the day of the year into a local timestamp — either a
1377  * 1-based Julian day, not including leap days, or the 0-based year-day,
1378  * including leap days.
1379  * */
1380 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1381 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1382 {
1383     // The function signature requires a TransitionRuleType pointer, but this
1384     // function is only applicable to DayRule* objects.
1385     DayRule *self = (DayRule *)base_self;
1386 
1387     // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1388     // to know the number of days since 1970-01-01, so we must subtract off
1389     // the equivalent of ymd_to_ord(1970, 1, 1).
1390     //
1391     // We subtract off an additional 1 day to account for January 1st (we want
1392     // the number of full days *before* the date of the transition - partial
1393     // days are accounted for in the hour, minute and second portions.
1394     int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1395 
1396     // The Julian day specification skips over February 29th in leap years,
1397     // from the POSIX standard:
1398     //
1399     //   Leap days shall not be counted. That is, in all years-including leap
1400     //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1401     //   refer explicitly to the occasional February 29.
1402     //
1403     // This is actually more useful than you'd think — if you want a rule that
1404     // always transitions on a given calendar day (other than February 29th),
1405     // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1406     // always refers to December 31st.
1407     uint16_t day = self->day;
1408     if (self->julian && day >= 59 && is_leap_year(year)) {
1409         day += 1;
1410     }
1411 
1412     return (days_before_year + day) * 86400 + (int64_t)self->hour * 3600 +
1413            (int64_t)self->minute * 60 + self->second;
1414 }
1415 
1416 /* Constructor for DayRule. */
1417 static int
dayrule_new(int julian,int day,int hour,int minute,int second,DayRule * out)1418 dayrule_new(int julian, int day, int hour, int minute,
1419             int second, DayRule *out)
1420 {
1421     // The POSIX standard specifies that Julian days must be in the range (1 <=
1422     // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1423     // be in the range (0 <= n <= 365).
1424     if (day < julian || day > 365) {
1425         PyErr_Format(PyExc_ValueError, "day must be in [%d, 365], not: %d",
1426                      julian, day);
1427         return -1;
1428     }
1429 
1430     if (hour < -167 || hour > 167) {
1431         PyErr_Format(PyExc_ValueError, "Hour must be in [0, 167]");
1432         return -1;
1433     }
1434 
1435     TransitionRuleType base = {
1436         &dayrule_year_to_timestamp,
1437     };
1438 
1439     DayRule tmp = {
1440         .base = base,
1441         .julian = (uint8_t)julian,
1442         .day = (int16_t)day,
1443         .hour = (int16_t)hour,
1444         .minute = (int8_t)minute,
1445         .second = (int8_t)second,
1446     };
1447 
1448     *out = tmp;
1449 
1450     return 0;
1451 }
1452 
1453 /* Calculate the start and end rules for a _tzrule in the given year. */
1454 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1455 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1456 {
1457     assert(rule->start != NULL);
1458     assert(rule->end != NULL);
1459     *start = rule->start->year_to_timestamp(rule->start, year);
1460     *end = rule->end->year_to_timestamp(rule->end, year);
1461 }
1462 
1463 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1464  *
1465  * This takes a local timestamp and fold for disambiguation purposes; the year
1466  * could technically be calculated from the timestamp, but given that the
1467  * callers of this function already have the year information accessible from
1468  * the datetime struct, it is taken as an additional parameter to reduce
1469  * unnecessary calculation.
1470  * */
1471 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1472 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1473 {
1474     if (rule->std_only) {
1475         return &(rule->std);
1476     }
1477 
1478     int64_t start, end;
1479     uint8_t isdst;
1480 
1481     tzrule_transitions(rule, year, &start, &end);
1482 
1483     // With fold = 0, the period (denominated in local time) with the smaller
1484     // offset starts at the end of the gap and ends at the end of the fold;
1485     // with fold = 1, it runs from the start of the gap to the beginning of the
1486     // fold.
1487     //
1488     // So in order to determine the DST boundaries we need to know both the
1489     // fold and whether DST is positive or negative (rare), and it turns out
1490     // that this boils down to fold XOR is_positive.
1491     if (fold == (rule->dst_diff >= 0)) {
1492         end -= rule->dst_diff;
1493     }
1494     else {
1495         start += rule->dst_diff;
1496     }
1497 
1498     if (start < end) {
1499         isdst = (ts >= start) && (ts < end);
1500     }
1501     else {
1502         isdst = (ts < end) || (ts >= start);
1503     }
1504 
1505     if (isdst) {
1506         return &(rule->dst);
1507     }
1508     else {
1509         return &(rule->std);
1510     }
1511 }
1512 
1513 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1514  *
1515  * This function can determine the _ttinfo that applies at a given epoch time,
1516  * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1517  * This is to be used in the .fromutc() function.
1518  *
1519  * The year is technically a redundant parameter, because it can be calculated
1520  * from the timestamp, but all callers of this function should have the year
1521  * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1522  * calculation.
1523  **/
1524 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1525 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1526                            unsigned char *fold)
1527 {
1528     if (rule->std_only) {
1529         *fold = 0;
1530         return &(rule->std);
1531     }
1532 
1533     int64_t start, end;
1534     uint8_t isdst;
1535     tzrule_transitions(rule, year, &start, &end);
1536     start -= rule->std.utcoff_seconds;
1537     end -= rule->dst.utcoff_seconds;
1538 
1539     if (start < end) {
1540         isdst = (ts >= start) && (ts < end);
1541     }
1542     else {
1543         isdst = (ts < end) || (ts >= start);
1544     }
1545 
1546     // For positive DST, the ambiguous period is one dst_diff after the end of
1547     // DST; for negative DST, the ambiguous period is one dst_diff before the
1548     // start of DST.
1549     int64_t ambig_start, ambig_end;
1550     if (rule->dst_diff > 0) {
1551         ambig_start = end;
1552         ambig_end = end + rule->dst_diff;
1553     }
1554     else {
1555         ambig_start = start;
1556         ambig_end = start - rule->dst_diff;
1557     }
1558 
1559     *fold = (ts >= ambig_start) && (ts < ambig_end);
1560 
1561     if (isdst) {
1562         return &(rule->dst);
1563     }
1564     else {
1565         return &(rule->std);
1566     }
1567 }
1568 
1569 /* Parse a TZ string in the format specified by the POSIX standard:
1570  *
1571  *  std offset[dst[offset],start[/time],end[/time]]
1572  *
1573  *  std and dst must be 3 or more characters long and must not contain a
1574  *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1575  *  spaces between "std" and "offset" are only for display and are not actually
1576  *  present in the string.
1577  *
1578  *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1579  *
1580  * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1581  *
1582  * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1583  */
1584 static int
parse_tz_str(zoneinfo_state * state,PyObject * tz_str_obj,_tzrule * out)1585 parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
1586 {
1587     PyObject *std_abbr = NULL;
1588     PyObject *dst_abbr = NULL;
1589     TransitionRuleType *start = NULL;
1590     TransitionRuleType *end = NULL;
1591     // Initialize offsets to invalid value (> 24 hours)
1592     long std_offset = 1 << 20;
1593     long dst_offset = 1 << 20;
1594 
1595     const char *tz_str = PyBytes_AsString(tz_str_obj);
1596     if (tz_str == NULL) {
1597         return -1;
1598     }
1599     const char *p = tz_str;
1600 
1601     // Read the `std` abbreviation, which must be at least 3 characters long.
1602     if (parse_abbr(&p, &std_abbr)) {
1603         if (!PyErr_Occurred()) {
1604             PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1605         }
1606         goto error;
1607     }
1608 
1609     // Now read the STD offset, which is required
1610     if (parse_tz_delta(&p, &std_offset)) {
1611         PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1612         goto error;
1613     }
1614 
1615     // If the string ends here, there is no DST, otherwise we must parse the
1616     // DST abbreviation and start and end dates and times.
1617     if (*p == '\0') {
1618         goto complete;
1619     }
1620 
1621     if (parse_abbr(&p, &dst_abbr)) {
1622         if (!PyErr_Occurred()) {
1623             PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1624         }
1625         goto error;
1626     }
1627 
1628     if (*p == ',') {
1629         // From the POSIX standard:
1630         //
1631         // If no offset follows dst, the alternative time is assumed to be one
1632         // hour ahead of standard time.
1633         dst_offset = std_offset + 3600;
1634     }
1635     else {
1636         if (parse_tz_delta(&p, &dst_offset)) {
1637             PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1638                          tz_str_obj);
1639             goto error;
1640         }
1641     }
1642 
1643     TransitionRuleType **transitions[2] = {&start, &end};
1644     for (size_t i = 0; i < 2; ++i) {
1645         if (*p != ',') {
1646             PyErr_Format(PyExc_ValueError,
1647                          "Missing transition rules in TZ string: %R",
1648                          tz_str_obj);
1649             goto error;
1650         }
1651         p++;
1652 
1653         if (parse_transition_rule(&p, transitions[i])) {
1654             PyErr_Format(PyExc_ValueError,
1655                          "Malformed transition rule in TZ string: %R",
1656                          tz_str_obj);
1657             goto error;
1658         }
1659     }
1660 
1661     if (*p != '\0') {
1662         PyErr_Format(PyExc_ValueError,
1663                      "Extraneous characters at end of TZ string: %R",
1664                      tz_str_obj);
1665         goto error;
1666     }
1667 
1668 complete:
1669     build_tzrule(state, std_abbr, dst_abbr, std_offset, dst_offset,
1670                  start, end, out);
1671     Py_DECREF(std_abbr);
1672     Py_XDECREF(dst_abbr);
1673 
1674     return 0;
1675 error:
1676     Py_XDECREF(std_abbr);
1677     if (dst_abbr != NULL && dst_abbr != Py_None) {
1678         Py_DECREF(dst_abbr);
1679     }
1680 
1681     if (start != NULL) {
1682         PyMem_Free(start);
1683     }
1684 
1685     if (end != NULL) {
1686         PyMem_Free(end);
1687     }
1688 
1689     return -1;
1690 }
1691 
1692 static int
parse_digits(const char ** p,int min,int max,int * value)1693 parse_digits(const char **p, int min, int max, int *value)
1694 {
1695     assert(max <= 3);
1696     *value = 0;
1697     for (int i = 0; i < max; i++, (*p)++) {
1698         if (!Py_ISDIGIT(**p)) {
1699             return (i < min) ? -1 : 0;
1700         }
1701         *value *= 10;
1702         *value += (**p) - '0';
1703     }
1704     return 0;
1705 }
1706 
1707 /* Parse the STD and DST abbreviations from a TZ string. */
1708 static int
parse_abbr(const char ** p,PyObject ** abbr)1709 parse_abbr(const char **p, PyObject **abbr)
1710 {
1711     const char *ptr = *p;
1712     const char *str_start;
1713     const char *str_end;
1714 
1715     if (*ptr == '<') {
1716         char buff;
1717         ptr++;
1718         str_start = ptr;
1719         while ((buff = *ptr) != '>') {
1720             // From the POSIX standard:
1721             //
1722             //   In the quoted form, the first character shall be the less-than
1723             //   ( '<' ) character and the last character shall be the
1724             //   greater-than ( '>' ) character. All characters between these
1725             //   quoting characters shall be alphanumeric characters from the
1726             //   portable character set in the current locale, the plus-sign (
1727             //   '+' ) character, or the minus-sign ( '-' ) character. The std
1728             //   and dst fields in this case shall not include the quoting
1729             //   characters.
1730             if (!Py_ISALPHA(buff) && !Py_ISDIGIT(buff) && buff != '+' &&
1731                 buff != '-') {
1732                 return -1;
1733             }
1734             ptr++;
1735         }
1736         str_end = ptr;
1737         ptr++;
1738     }
1739     else {
1740         str_start = ptr;
1741         // From the POSIX standard:
1742         //
1743         //   In the unquoted form, all characters in these fields shall be
1744         //   alphabetic characters from the portable character set in the
1745         //   current locale.
1746         while (Py_ISALPHA(*ptr)) {
1747             ptr++;
1748         }
1749         str_end = ptr;
1750         if (str_end == str_start) {
1751             return -1;
1752         }
1753     }
1754 
1755     *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1756     if (*abbr == NULL) {
1757         return -1;
1758     }
1759 
1760     *p = ptr;
1761     return 0;
1762 }
1763 
1764 /* Parse a UTC offset from a TZ str. */
1765 static int
parse_tz_delta(const char ** p,long * total_seconds)1766 parse_tz_delta(const char **p, long *total_seconds)
1767 {
1768     // From the POSIX spec:
1769     //
1770     //   Indicates the value added to the local time to arrive at Coordinated
1771     //   Universal Time. The offset has the form:
1772     //
1773     //   hh[:mm[:ss]]
1774     //
1775     //   One or more digits may be used; the value is always interpreted as a
1776     //   decimal number.
1777     //
1778     // The POSIX spec says that the values for `hour` must be between 0 and 24
1779     // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1780     // transition times may be signed and range from -167 to 167.
1781     int hours = 0;
1782     int minutes = 0;
1783     int seconds = 0;
1784 
1785     if (parse_transition_time(p, &hours, &minutes, &seconds)) {
1786         return -1;
1787     }
1788 
1789     if (hours > 24 || hours < -24) {
1790         return -1;
1791     }
1792 
1793     // Negative numbers correspond to *positive* offsets, from the spec:
1794     //
1795     //   If preceded by a '-', the timezone shall be east of the Prime
1796     //   Meridian; otherwise, it shall be west (which may be indicated by
1797     //   an optional preceding '+' ).
1798     *total_seconds = -((hours * 3600L) + (minutes * 60) + seconds);
1799     return 0;
1800 }
1801 
1802 /* Parse the date portion of a transition rule. */
1803 static int
parse_transition_rule(const char ** p,TransitionRuleType ** out)1804 parse_transition_rule(const char **p, TransitionRuleType **out)
1805 {
1806     // The full transition rule indicates when to change back and forth between
1807     // STD and DST, and has the form:
1808     //
1809     //   date[/time],date[/time]
1810     //
1811     // This function parses an individual date[/time] section, and returns
1812     // the number of characters that contributed to the transition rule. This
1813     // does not include the ',' at the end of the first rule.
1814     //
1815     // The POSIX spec states that if *time* is not given, the default is 02:00.
1816     const char *ptr = *p;
1817     int hour = 2;
1818     int minute = 0;
1819     int second = 0;
1820 
1821     // Rules come in one of three flavors:
1822     //
1823     //   1. Jn: Julian day n, with no leap days.
1824     //   2. n: Day of year (0-based, with leap days)
1825     //   3. Mm.n.d: Specifying by month, week and day-of-week.
1826 
1827     if (*ptr == 'M') {
1828         int month, week, day;
1829         ptr++;
1830 
1831         if (parse_digits(&ptr, 1, 2, &month)) {
1832             return -1;
1833         }
1834         if (*ptr++ != '.') {
1835             return -1;
1836         }
1837         if (parse_digits(&ptr, 1, 1, &week)) {
1838             return -1;
1839         }
1840         if (*ptr++ != '.') {
1841             return -1;
1842         }
1843         if (parse_digits(&ptr, 1, 1, &day)) {
1844             return -1;
1845         }
1846 
1847         if (*ptr == '/') {
1848             ptr++;
1849             if (parse_transition_time(&ptr, &hour, &minute, &second)) {
1850                 return -1;
1851             }
1852         }
1853 
1854         CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1855         if (rv == NULL) {
1856             return -1;
1857         }
1858 
1859         if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1860             PyMem_Free(rv);
1861             return -1;
1862         }
1863 
1864         *out = (TransitionRuleType *)rv;
1865     }
1866     else {
1867         int julian = 0;
1868         int day = 0;
1869         if (*ptr == 'J') {
1870             julian = 1;
1871             ptr++;
1872         }
1873 
1874         if (parse_digits(&ptr, 1, 3, &day)) {
1875             return -1;
1876         }
1877 
1878         if (*ptr == '/') {
1879             ptr++;
1880             if (parse_transition_time(&ptr, &hour, &minute, &second)) {
1881                 return -1;
1882             }
1883         }
1884 
1885         DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1886         if (rv == NULL) {
1887             return -1;
1888         }
1889 
1890         if (dayrule_new(julian, day, hour, minute, second, rv)) {
1891             PyMem_Free(rv);
1892             return -1;
1893         }
1894         *out = (TransitionRuleType *)rv;
1895     }
1896 
1897     *p = ptr;
1898     return 0;
1899 }
1900 
1901 /* Parse the time portion of a transition rule (e.g. following an /) */
1902 static int
parse_transition_time(const char ** p,int * hour,int * minute,int * second)1903 parse_transition_time(const char **p, int *hour, int *minute, int *second)
1904 {
1905     // From the spec:
1906     //
1907     //   The time has the same format as offset except that no leading sign
1908     //   ( '-' or '+' ) is allowed.
1909     //
1910     // The format for the offset is:
1911     //
1912     //   h[h][:mm[:ss]]
1913     //
1914     // RFC 8536 also allows transition times to be signed and to range from
1915     // -167 to +167.
1916     const char *ptr = *p;
1917     int sign = 1;
1918 
1919     if (*ptr == '-' || *ptr == '+') {
1920         if (*ptr == '-') {
1921             sign = -1;
1922         }
1923         ptr++;
1924     }
1925 
1926     // The hour can be 1 to 3 numeric characters
1927     if (parse_digits(&ptr, 1, 3, hour)) {
1928         return -1;
1929     }
1930     *hour *= sign;
1931 
1932     // Minutes and seconds always of the format ":dd"
1933     if (*ptr == ':') {
1934         ptr++;
1935         if (parse_digits(&ptr, 2, 2, minute)) {
1936             return -1;
1937         }
1938         *minute *= sign;
1939 
1940         if (*ptr == ':') {
1941             ptr++;
1942             if (parse_digits(&ptr, 2, 2, second)) {
1943                 return -1;
1944             }
1945             *second *= sign;
1946         }
1947     }
1948 
1949     *p = ptr;
1950     return 0;
1951 }
1952 
1953 /* Constructor for a _tzrule.
1954  *
1955  * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1956  * case `dst_offset` will be ignored and `start` and `end` are expected to be
1957  * NULL as well.
1958  *
1959  * Returns 0 on success.
1960  */
1961 static int
build_tzrule(zoneinfo_state * state,PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1962 build_tzrule(zoneinfo_state *state, PyObject *std_abbr, PyObject *dst_abbr,
1963              long std_offset, long dst_offset, TransitionRuleType *start,
1964              TransitionRuleType *end, _tzrule *out)
1965 {
1966     _tzrule rv = {{0}};
1967 
1968     rv.start = start;
1969     rv.end = end;
1970 
1971     if (build_ttinfo(state, std_offset, 0, std_abbr, &rv.std)) {
1972         goto error;
1973     }
1974 
1975     if (dst_abbr != NULL) {
1976         rv.dst_diff = dst_offset - std_offset;
1977         if (build_ttinfo(state, dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1978             goto error;
1979         }
1980     }
1981     else {
1982         rv.std_only = 1;
1983     }
1984 
1985     *out = rv;
1986 
1987     return 0;
1988 error:
1989     xdecref_ttinfo(&rv.std);
1990     xdecref_ttinfo(&rv.dst);
1991     return -1;
1992 }
1993 
1994 /* Destructor for _tzrule. */
1995 static void
free_tzrule(_tzrule * tzrule)1996 free_tzrule(_tzrule *tzrule)
1997 {
1998     xdecref_ttinfo(&(tzrule->std));
1999     if (!tzrule->std_only) {
2000         xdecref_ttinfo(&(tzrule->dst));
2001     }
2002 
2003     if (tzrule->start != NULL) {
2004         PyMem_Free(tzrule->start);
2005     }
2006 
2007     if (tzrule->end != NULL) {
2008         PyMem_Free(tzrule->end);
2009     }
2010 }
2011 
2012 /* Calculate DST offsets from transitions and UTC offsets
2013  *
2014  * This is necessary because each C `ttinfo` only contains the UTC offset,
2015  * time zone abbreviation and an isdst boolean - it does not include the
2016  * amount of the DST offset, but we need the amount for the dst() function.
2017  *
2018  * Thus function uses heuristics to infer what the offset should be, so it
2019  * is not guaranteed that this will work for all zones. If we cannot assign
2020  * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
2021  * bool(dt.dst()) will always match ttinfo.isdst.
2022  */
2023 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)2024 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
2025                  unsigned char *isdsts, size_t num_transitions,
2026                  size_t num_ttinfos)
2027 {
2028     size_t dst_count = 0;
2029     size_t dst_found = 0;
2030     for (size_t i = 0; i < num_ttinfos; ++i) {
2031         dst_count++;
2032     }
2033 
2034     for (size_t i = 1; i < num_transitions; ++i) {
2035         if (dst_count == dst_found) {
2036             break;
2037         }
2038 
2039         size_t idx = trans_idx[i];
2040         size_t comp_idx = trans_idx[i - 1];
2041 
2042         // Only look at DST offsets that have nto been assigned already
2043         if (!isdsts[idx] || dstoffs[idx] != 0) {
2044             continue;
2045         }
2046 
2047         long dstoff = 0;
2048         long utcoff = utcoffs[idx];
2049 
2050         if (!isdsts[comp_idx]) {
2051             dstoff = utcoff - utcoffs[comp_idx];
2052         }
2053 
2054         if (!dstoff && idx < (num_ttinfos - 1)) {
2055             comp_idx = trans_idx[i + 1];
2056 
2057             // If the following transition is also DST and we couldn't find
2058             // the DST offset by this point, we're going to have to skip it
2059             // and hope this transition gets assigned later
2060             if (isdsts[comp_idx]) {
2061                 continue;
2062             }
2063 
2064             dstoff = utcoff - utcoffs[comp_idx];
2065         }
2066 
2067         if (dstoff) {
2068             dst_found++;
2069             dstoffs[idx] = dstoff;
2070         }
2071     }
2072 
2073     if (dst_found < dst_count) {
2074         // If there are time zones we didn't find a value for, we'll end up
2075         // with dstoff = 0 for something where isdst=1. This is obviously
2076         // wrong — one hour will be a much better guess than 0.
2077         for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2078             if (isdsts[idx] && !dstoffs[idx]) {
2079                 dstoffs[idx] = 3600;
2080             }
2081         }
2082     }
2083 }
2084 
2085 #define _swap(x, y, buffer) \
2086     buffer = x;             \
2087     x = y;                  \
2088     y = buffer;
2089 
2090 /* Calculate transitions in local time from UTC time and offsets.
2091  *
2092  * We want to know when each transition occurs, denominated in the number of
2093  * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2094  * *local time* (note: this is *not* equivalent to the output of
2095  * datetime.timestamp, which is the total number of seconds actual elapsed
2096  * since 1970-01-01T00:00:00Z in UTC).
2097  *
2098  * This is an ambiguous question because "local time" can be ambiguous — but it
2099  * is disambiguated by the `fold` parameter, so we allocate two arrays:
2100  *
2101  *  trans_local[0]: The wall-time transitions for fold=0
2102  *  trans_local[1]: The wall-time transitions for fold=1
2103  *
2104  * This returns 0 on success and a negative number of failure. The trans_local
2105  * arrays must be freed if they are not NULL.
2106  */
2107 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2108 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2109             int64_t *trans_local[2], size_t num_ttinfos,
2110             size_t num_transitions)
2111 {
2112     if (num_transitions == 0) {
2113         return 0;
2114     }
2115 
2116     // Copy the UTC transitions into each array to be modified in place later
2117     for (size_t i = 0; i < 2; ++i) {
2118         trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2119         if (trans_local[i] == NULL) {
2120             return -1;
2121         }
2122 
2123         memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2124     }
2125 
2126     int64_t offset_0, offset_1, buff;
2127     if (num_ttinfos > 1) {
2128         offset_0 = utcoff[0];
2129         offset_1 = utcoff[trans_idx[0]];
2130 
2131         if (offset_1 > offset_0) {
2132             _swap(offset_0, offset_1, buff);
2133         }
2134     }
2135     else {
2136         offset_0 = utcoff[0];
2137         offset_1 = utcoff[0];
2138     }
2139 
2140     trans_local[0][0] += offset_0;
2141     trans_local[1][0] += offset_1;
2142 
2143     for (size_t i = 1; i < num_transitions; ++i) {
2144         offset_0 = utcoff[trans_idx[i - 1]];
2145         offset_1 = utcoff[trans_idx[i]];
2146 
2147         if (offset_1 > offset_0) {
2148             _swap(offset_1, offset_0, buff);
2149         }
2150 
2151         trans_local[0][i] += offset_0;
2152         trans_local[1][i] += offset_1;
2153     }
2154 
2155     return 0;
2156 }
2157 
2158 /* Simple bisect_right binary search implementation */
2159 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2160 _bisect(const int64_t value, const int64_t *arr, size_t size)
2161 {
2162     size_t lo = 0;
2163     size_t hi = size;
2164     size_t m;
2165 
2166     while (lo < hi) {
2167         m = (lo + hi) / 2;
2168         if (arr[m] > value) {
2169             hi = m;
2170         }
2171         else {
2172             lo = m + 1;
2173         }
2174     }
2175 
2176     return hi;
2177 }
2178 
2179 /* Find the ttinfo rules that apply at a given local datetime. */
2180 static _ttinfo *
find_ttinfo(zoneinfo_state * state,PyZoneInfo_ZoneInfo * self,PyObject * dt)2181 find_ttinfo(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *dt)
2182 {
2183     // datetime.time has a .tzinfo attribute that passes None as the dt
2184     // argument; it only really has meaning for fixed-offset zones.
2185     if (dt == Py_None) {
2186         if (self->fixed_offset) {
2187             return &(self->tzrule_after.std);
2188         }
2189         else {
2190             return &(state->NO_TTINFO);
2191         }
2192     }
2193 
2194     int64_t ts;
2195     if (get_local_timestamp(dt, &ts)) {
2196         return NULL;
2197     }
2198 
2199     unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2200     assert(fold < 2);
2201     int64_t *local_transitions = self->trans_list_wall[fold];
2202     size_t num_trans = self->num_transitions;
2203 
2204     if (num_trans && ts < local_transitions[0]) {
2205         return self->ttinfo_before;
2206     }
2207     else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2208         return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2209                                   PyDateTime_GET_YEAR(dt));
2210     }
2211     else {
2212         size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2213         assert(idx < self->num_transitions);
2214         return self->trans_ttinfos[idx];
2215     }
2216 }
2217 
2218 static int
is_leap_year(int year)2219 is_leap_year(int year)
2220 {
2221     const unsigned int ayear = (unsigned int)year;
2222     return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2223 }
2224 
2225 /* Calculates ordinal datetime from year, month and day. */
2226 static int
ymd_to_ord(int y,int m,int d)2227 ymd_to_ord(int y, int m, int d)
2228 {
2229     y -= 1;
2230     int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2231     int yearday = DAYS_BEFORE_MONTH[m];
2232     if (m > 2 && is_leap_year(y + 1)) {
2233         yearday += 1;
2234     }
2235 
2236     return days_before_year + yearday + d;
2237 }
2238 
2239 /* Calculate the number of seconds since 1970-01-01 in local time.
2240  *
2241  * This gets a datetime in the same "units" as self->trans_list_wall so that we
2242  * can easily determine which transitions a datetime falls between. See the
2243  * comment above ts_to_local for more information.
2244  * */
2245 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2246 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2247 {
2248     assert(local_ts != NULL);
2249 
2250     int hour, minute, second;
2251     int ord;
2252     if (PyDateTime_CheckExact(dt)) {
2253         int y = PyDateTime_GET_YEAR(dt);
2254         int m = PyDateTime_GET_MONTH(dt);
2255         int d = PyDateTime_GET_DAY(dt);
2256         hour = PyDateTime_DATE_GET_HOUR(dt);
2257         minute = PyDateTime_DATE_GET_MINUTE(dt);
2258         second = PyDateTime_DATE_GET_SECOND(dt);
2259 
2260         ord = ymd_to_ord(y, m, d);
2261     }
2262     else {
2263         PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2264         if (num == NULL) {
2265             return -1;
2266         }
2267 
2268         ord = PyLong_AsLong(num);
2269         Py_DECREF(num);
2270         if (ord == -1 && PyErr_Occurred()) {
2271             return -1;
2272         }
2273 
2274         num = PyObject_GetAttrString(dt, "hour");
2275         if (num == NULL) {
2276             return -1;
2277         }
2278         hour = PyLong_AsLong(num);
2279         Py_DECREF(num);
2280         if (hour == -1) {
2281             return -1;
2282         }
2283 
2284         num = PyObject_GetAttrString(dt, "minute");
2285         if (num == NULL) {
2286             return -1;
2287         }
2288         minute = PyLong_AsLong(num);
2289         Py_DECREF(num);
2290         if (minute == -1) {
2291             return -1;
2292         }
2293 
2294         num = PyObject_GetAttrString(dt, "second");
2295         if (num == NULL) {
2296             return -1;
2297         }
2298         second = PyLong_AsLong(num);
2299         Py_DECREF(num);
2300         if (second == -1) {
2301             return -1;
2302         }
2303     }
2304 
2305     *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400L +
2306                 (int64_t)(hour * 3600L + minute * 60 + second);
2307 
2308     return 0;
2309 }
2310 
2311 /////
2312 // Functions for cache handling
2313 
2314 /* Constructor for StrongCacheNode
2315  *
2316  * This function doesn't set MemoryError if PyMem_Malloc fails,
2317  * as the cache intentionally doesn't propagate exceptions
2318  * and fails silently if error occurs.
2319  */
2320 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2321 strong_cache_node_new(PyObject *key, PyObject *zone)
2322 {
2323     StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2324     if (node == NULL) {
2325         return NULL;
2326     }
2327 
2328     node->next = NULL;
2329     node->prev = NULL;
2330     node->key = Py_NewRef(key);
2331     node->zone = Py_NewRef(zone);
2332 
2333     return node;
2334 }
2335 
2336 /* Destructor for StrongCacheNode */
2337 void
strong_cache_node_free(StrongCacheNode * node)2338 strong_cache_node_free(StrongCacheNode *node)
2339 {
2340     Py_XDECREF(node->key);
2341     Py_XDECREF(node->zone);
2342 
2343     PyMem_Free(node);
2344 }
2345 
2346 /* Frees all nodes at or after a specified root in the strong cache.
2347  *
2348  * This can be used on the root node to free the entire cache or it can be used
2349  * to clear all nodes that have been expired (which, if everything is going
2350  * right, will actually only be 1 node at a time).
2351  */
2352 void
strong_cache_free(StrongCacheNode * root)2353 strong_cache_free(StrongCacheNode *root)
2354 {
2355     StrongCacheNode *node = root;
2356     StrongCacheNode *next_node;
2357     while (node != NULL) {
2358         next_node = node->next;
2359         strong_cache_node_free(node);
2360 
2361         node = next_node;
2362     }
2363 }
2364 
2365 /* Removes a node from the cache and update its neighbors.
2366  *
2367  * This is used both when ejecting a node from the cache and when moving it to
2368  * the front of the cache.
2369  */
2370 static void
remove_from_strong_cache(zoneinfo_state * state,StrongCacheNode * node)2371 remove_from_strong_cache(zoneinfo_state *state, StrongCacheNode *node)
2372 {
2373     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType);
2374     if (state->ZONEINFO_STRONG_CACHE == node) {
2375         state->ZONEINFO_STRONG_CACHE = node->next;
2376     }
2377 
2378     if (node->prev != NULL) {
2379         node->prev->next = node->next;
2380     }
2381 
2382     if (node->next != NULL) {
2383         node->next->prev = node->prev;
2384     }
2385 
2386     node->next = NULL;
2387     node->prev = NULL;
2388 }
2389 
2390 /* Retrieves the node associated with a key, if it exists.
2391  *
2392  * This traverses the strong cache until it finds a matching key and returns a
2393  * pointer to the relevant node if found. Returns NULL if no node is found.
2394  *
2395  * root may be NULL, indicating an empty cache.
2396  */
2397 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2398 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2399 {
2400     const StrongCacheNode *node = root;
2401     while (node != NULL) {
2402         int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2403         if (rv < 0) {
2404             return NULL;
2405         }
2406         if (rv) {
2407             return (StrongCacheNode *)node;
2408         }
2409 
2410         node = node->next;
2411     }
2412 
2413     return NULL;
2414 }
2415 
2416 /* Ejects a given key from the class's strong cache, if applicable.
2417  *
2418  * This function is used to enable the per-key functionality in clear_cache.
2419  */
2420 static int
eject_from_strong_cache(zoneinfo_state * state,const PyTypeObject * const type,PyObject * key)2421 eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2422                         PyObject *key)
2423 {
2424     if (type != state->ZoneInfoType) {
2425         return 0;
2426     }
2427 
2428     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType);
2429     StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE;
2430     StrongCacheNode *node = find_in_strong_cache(cache, key);
2431     if (node != NULL) {
2432         remove_from_strong_cache(state, node);
2433 
2434         strong_cache_node_free(node);
2435     }
2436     else if (PyErr_Occurred()) {
2437         return -1;
2438     }
2439     return 0;
2440 }
2441 
2442 /* Moves a node to the front of the LRU cache.
2443  *
2444  * The strong cache is an LRU cache, so whenever a given node is accessed, if
2445  * it is not at the front of the cache, it needs to be moved there.
2446  */
2447 static void
move_strong_cache_node_to_front(zoneinfo_state * state,StrongCacheNode ** root,StrongCacheNode * node)2448 move_strong_cache_node_to_front(zoneinfo_state *state, StrongCacheNode **root,
2449                                 StrongCacheNode *node)
2450 {
2451     StrongCacheNode *root_p = *root;
2452     if (root_p == node) {
2453         return;
2454     }
2455 
2456     remove_from_strong_cache(state, node);
2457 
2458     node->prev = NULL;
2459     node->next = root_p;
2460 
2461     if (root_p != NULL) {
2462         root_p->prev = node;
2463     }
2464 
2465     *root = node;
2466 }
2467 
2468 /* Retrieves a ZoneInfo from the strong cache if it's present.
2469  *
2470  * This function finds the ZoneInfo by key and if found will move the node to
2471  * the front of the LRU cache and return a new reference to it. It returns NULL
2472  * if the key is not in the cache.
2473  *
2474  * The strong cache is currently only implemented for the base class, so this
2475  * always returns a cache miss for subclasses.
2476  */
2477 static PyObject *
zone_from_strong_cache(zoneinfo_state * state,const PyTypeObject * const type,PyObject * const key)2478 zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2479                        PyObject *const key)
2480 {
2481     if (type != state->ZoneInfoType) {
2482         return NULL;  // Strong cache currently only implemented for base class
2483     }
2484 
2485     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType);
2486     StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE;
2487     StrongCacheNode *node = find_in_strong_cache(cache, key);
2488 
2489     if (node != NULL) {
2490         StrongCacheNode **root = &(state->ZONEINFO_STRONG_CACHE);
2491         move_strong_cache_node_to_front(state, root, node);
2492         return Py_NewRef(node->zone);
2493     }
2494 
2495     return NULL;  // Cache miss
2496 }
2497 
2498 /* Inserts a new key into the strong LRU cache.
2499  *
2500  * This function is only to be used after a cache miss — it creates a new node
2501  * at the front of the cache and ejects any stale entries (keeping the size of
2502  * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2503  */
2504 static void
update_strong_cache(zoneinfo_state * state,const PyTypeObject * const type,PyObject * key,PyObject * zone)2505 update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type,
2506                     PyObject *key, PyObject *zone)
2507 {
2508     if (type != state->ZoneInfoType) {
2509         return;
2510     }
2511 
2512     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType);
2513     StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2514     if (new_node == NULL) {
2515         return;
2516     }
2517     StrongCacheNode **root = &(state->ZONEINFO_STRONG_CACHE);
2518     move_strong_cache_node_to_front(state, root, new_node);
2519 
2520     StrongCacheNode *node = new_node->next;
2521     for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2522         if (node == NULL) {
2523             return;
2524         }
2525         node = node->next;
2526     }
2527 
2528     // Everything beyond this point needs to be freed
2529     if (node != NULL) {
2530         if (node->prev != NULL) {
2531             node->prev->next = NULL;
2532         }
2533         strong_cache_free(node);
2534     }
2535 }
2536 
2537 /* Clears all entries into a type's strong cache.
2538  *
2539  * Because the strong cache is not implemented for subclasses, this is a no-op
2540  * for everything except the base class.
2541  */
2542 void
clear_strong_cache(zoneinfo_state * state,const PyTypeObject * const type)2543 clear_strong_cache(zoneinfo_state *state, const PyTypeObject *const type)
2544 {
2545     if (type != state->ZoneInfoType) {
2546         return;
2547     }
2548 
2549     strong_cache_free(state->ZONEINFO_STRONG_CACHE);
2550     state->ZONEINFO_STRONG_CACHE = NULL;
2551 }
2552 
2553 static PyObject *
new_weak_cache(void)2554 new_weak_cache(void)
2555 {
2556     PyObject *WeakValueDictionary =
2557             _PyImport_GetModuleAttrString("weakref", "WeakValueDictionary");
2558     if (WeakValueDictionary == NULL) {
2559         return NULL;
2560     }
2561     PyObject *weak_cache = PyObject_CallNoArgs(WeakValueDictionary);
2562     Py_DECREF(WeakValueDictionary);
2563     return weak_cache;
2564 }
2565 
2566 // This function is not idempotent and must be called on a new module object.
2567 static int
initialize_caches(zoneinfo_state * state)2568 initialize_caches(zoneinfo_state *state)
2569 {
2570     state->TIMEDELTA_CACHE = PyDict_New();
2571     if (state->TIMEDELTA_CACHE == NULL) {
2572         return -1;
2573     }
2574 
2575     state->ZONEINFO_WEAK_CACHE = new_weak_cache();
2576     if (state->ZONEINFO_WEAK_CACHE == NULL) {
2577         return -1;
2578     }
2579 
2580     return 0;
2581 }
2582 
2583 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2584 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2585 {
2586     PyObject *weak_cache = new_weak_cache();
2587     if (weak_cache == NULL) {
2588         return NULL;
2589     }
2590 
2591     if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2592                                weak_cache) < 0) {
2593         Py_DECREF(weak_cache);
2594         return NULL;
2595     }
2596     Py_DECREF(weak_cache);
2597     Py_RETURN_NONE;
2598 }
2599 
2600 /////
2601 // Specify the ZoneInfo type
2602 static PyMethodDef zoneinfo_methods[] = {
2603     ZONEINFO_ZONEINFO_CLEAR_CACHE_METHODDEF
2604     ZONEINFO_ZONEINFO_NO_CACHE_METHODDEF
2605     ZONEINFO_ZONEINFO_FROM_FILE_METHODDEF
2606     ZONEINFO_ZONEINFO_UTCOFFSET_METHODDEF
2607     ZONEINFO_ZONEINFO_DST_METHODDEF
2608     ZONEINFO_ZONEINFO_TZNAME_METHODDEF
2609     {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2610      PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2611                "datetime in local time.")},
2612     {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2613      PyDoc_STR("Function for serialization with the pickle protocol.")},
2614     ZONEINFO_ZONEINFO__UNPICKLE_METHODDEF
2615     {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2616      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2617      PyDoc_STR("Function to initialize subclasses.")},
2618     {NULL} /* Sentinel */
2619 };
2620 
2621 static PyMemberDef zoneinfo_members[] = {
2622     {.name = "key",
2623      .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2624      .type = Py_T_OBJECT_EX,
2625      .flags = Py_READONLY,
2626      .doc = NULL},
2627     {.name = "__weaklistoffset__",
2628      .offset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2629      .type = Py_T_PYSSIZET,
2630      .flags = Py_READONLY},
2631     {NULL}, /* Sentinel */
2632 };
2633 
2634 static PyType_Slot zoneinfo_slots[] = {
2635     {Py_tp_repr, zoneinfo_repr},
2636     {Py_tp_str, zoneinfo_str},
2637     {Py_tp_getattro, PyObject_GenericGetAttr},
2638     {Py_tp_methods, zoneinfo_methods},
2639     {Py_tp_members, zoneinfo_members},
2640     {Py_tp_new, zoneinfo_ZoneInfo},
2641     {Py_tp_dealloc, zoneinfo_dealloc},
2642     {Py_tp_traverse, zoneinfo_traverse},
2643     {Py_tp_clear, zoneinfo_clear},
2644     {0, NULL},
2645 };
2646 
2647 static PyType_Spec zoneinfo_spec = {
2648     .name = "zoneinfo.ZoneInfo",
2649     .basicsize = sizeof(PyZoneInfo_ZoneInfo),
2650     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2651               Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE),
2652     .slots = zoneinfo_slots,
2653 };
2654 
2655 /////
2656 // Specify the _zoneinfo module
2657 static PyMethodDef module_methods[] = {{NULL, NULL}};
2658 
2659 static int
module_traverse(PyObject * mod,visitproc visit,void * arg)2660 module_traverse(PyObject *mod, visitproc visit, void *arg)
2661 {
2662     zoneinfo_state *state = zoneinfo_get_state(mod);
2663 
2664     Py_VISIT(state->ZoneInfoType);
2665     Py_VISIT(state->io_open);
2666     Py_VISIT(state->_tzpath_find_tzfile);
2667     Py_VISIT(state->_common_mod);
2668     Py_VISIT(state->TIMEDELTA_CACHE);
2669     Py_VISIT(state->ZONEINFO_WEAK_CACHE);
2670 
2671     StrongCacheNode *node = state->ZONEINFO_STRONG_CACHE;
2672     while (node != NULL) {
2673         StrongCacheNode *next = node->next;
2674         Py_VISIT(node->key);
2675         Py_VISIT(node->zone);
2676         node = next;
2677     }
2678 
2679     Py_VISIT(state->NO_TTINFO.utcoff);
2680     Py_VISIT(state->NO_TTINFO.dstoff);
2681     Py_VISIT(state->NO_TTINFO.tzname);
2682 
2683     return 0;
2684 }
2685 
2686 static int
module_clear(PyObject * mod)2687 module_clear(PyObject *mod)
2688 {
2689     zoneinfo_state *state = zoneinfo_get_state(mod);
2690 
2691     Py_CLEAR(state->ZoneInfoType);
2692     Py_CLEAR(state->io_open);
2693     Py_CLEAR(state->_tzpath_find_tzfile);
2694     Py_CLEAR(state->_common_mod);
2695     Py_CLEAR(state->TIMEDELTA_CACHE);
2696     Py_CLEAR(state->ZONEINFO_WEAK_CACHE);
2697     clear_strong_cache(state, state->ZoneInfoType);
2698     Py_CLEAR(state->NO_TTINFO.utcoff);
2699     Py_CLEAR(state->NO_TTINFO.dstoff);
2700     Py_CLEAR(state->NO_TTINFO.tzname);
2701 
2702     return 0;
2703 }
2704 
2705 static void
module_free(void * mod)2706 module_free(void *mod)
2707 {
2708     (void)module_clear((PyObject *)mod);
2709 }
2710 
2711 static int
zoneinfomodule_exec(PyObject * m)2712 zoneinfomodule_exec(PyObject *m)
2713 {
2714     PyDateTime_IMPORT;
2715     if (PyDateTimeAPI == NULL) {
2716         goto error;
2717     }
2718 
2719     zoneinfo_state *state = zoneinfo_get_state(m);
2720     PyObject *base = (PyObject *)PyDateTimeAPI->TZInfoType;
2721     state->ZoneInfoType = (PyTypeObject *)PyType_FromModuleAndSpec(m,
2722                                                         &zoneinfo_spec, base);
2723     if (state->ZoneInfoType == NULL) {
2724         goto error;
2725     }
2726 
2727     int rc = PyModule_AddObjectRef(m, "ZoneInfo",
2728                                    (PyObject *)state->ZoneInfoType);
2729     if (rc < 0) {
2730         goto error;
2731     }
2732 
2733     /* Populate imports */
2734     state->_tzpath_find_tzfile =
2735         _PyImport_GetModuleAttrString("zoneinfo._tzpath", "find_tzfile");
2736     if (state->_tzpath_find_tzfile == NULL) {
2737         goto error;
2738     }
2739 
2740     state->io_open = _PyImport_GetModuleAttrString("io", "open");
2741     if (state->io_open == NULL) {
2742         goto error;
2743     }
2744 
2745     state->_common_mod = PyImport_ImportModule("zoneinfo._common");
2746     if (state->_common_mod == NULL) {
2747         goto error;
2748     }
2749 
2750     if (state->NO_TTINFO.utcoff == NULL) {
2751         state->NO_TTINFO.utcoff = Py_NewRef(Py_None);
2752         state->NO_TTINFO.dstoff = Py_NewRef(Py_None);
2753         state->NO_TTINFO.tzname = Py_NewRef(Py_None);
2754     }
2755 
2756     if (initialize_caches(state)) {
2757         goto error;
2758     }
2759 
2760     return 0;
2761 
2762 error:
2763     return -1;
2764 }
2765 
2766 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2767     {Py_mod_exec, zoneinfomodule_exec},
2768     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2769     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
2770     {0, NULL},
2771 };
2772 
2773 static struct PyModuleDef zoneinfomodule = {
2774     .m_base = PyModuleDef_HEAD_INIT,
2775     .m_name = "_zoneinfo",
2776     .m_doc = "C implementation of the zoneinfo module",
2777     .m_size = sizeof(zoneinfo_state),
2778     .m_methods = module_methods,
2779     .m_slots = zoneinfomodule_slots,
2780     .m_traverse = module_traverse,
2781     .m_clear = module_clear,
2782     .m_free = module_free,
2783 };
2784 
2785 PyMODINIT_FUNC
PyInit__zoneinfo(void)2786 PyInit__zoneinfo(void)
2787 {
2788     return PyModuleDef_Init(&zoneinfomodule);
2789 }
2790