• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_long.h"          // _PyLong_GetOne()
3 #include "structmember.h"
4 
5 #include <ctype.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 
9 #include "datetime.h"
10 
11 // Imports
12 static PyObject *io_open = NULL;
13 static PyObject *_tzpath_find_tzfile = NULL;
14 static PyObject *_common_mod = NULL;
15 
16 typedef struct TransitionRuleType TransitionRuleType;
17 typedef struct StrongCacheNode StrongCacheNode;
18 
19 typedef struct {
20     PyObject *utcoff;
21     PyObject *dstoff;
22     PyObject *tzname;
23     long utcoff_seconds;
24 } _ttinfo;
25 
26 typedef struct {
27     _ttinfo std;
28     _ttinfo dst;
29     int dst_diff;
30     TransitionRuleType *start;
31     TransitionRuleType *end;
32     unsigned char std_only;
33 } _tzrule;
34 
35 typedef struct {
36     PyDateTime_TZInfo base;
37     PyObject *key;
38     PyObject *file_repr;
39     PyObject *weakreflist;
40     size_t num_transitions;
41     size_t num_ttinfos;
42     int64_t *trans_list_utc;
43     int64_t *trans_list_wall[2];
44     _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
45     _ttinfo *ttinfo_before;
46     _tzrule tzrule_after;
47     _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
48     unsigned char fixed_offset;
49     unsigned char source;
50 } PyZoneInfo_ZoneInfo;
51 
52 struct TransitionRuleType {
53     int64_t (*year_to_timestamp)(TransitionRuleType *, int);
54 };
55 
56 typedef struct {
57     TransitionRuleType base;
58     uint8_t month;
59     uint8_t week;
60     uint8_t day;
61     int8_t hour;
62     int8_t minute;
63     int8_t second;
64 } CalendarRule;
65 
66 typedef struct {
67     TransitionRuleType base;
68     uint8_t julian;
69     unsigned int day;
70     int8_t hour;
71     int8_t minute;
72     int8_t second;
73 } DayRule;
74 
75 struct StrongCacheNode {
76     StrongCacheNode *next;
77     StrongCacheNode *prev;
78     PyObject *key;
79     PyObject *zone;
80 };
81 
82 static PyTypeObject PyZoneInfo_ZoneInfoType;
83 
84 // Globals
85 static PyObject *TIMEDELTA_CACHE = NULL;
86 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
87 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
88 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
89 
90 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
91 
92 // Constants
93 static const int EPOCHORDINAL = 719163;
94 static int DAYS_IN_MONTH[] = {
95     -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
96 };
97 
98 static int DAYS_BEFORE_MONTH[] = {
99     -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
100 };
101 
102 static const int SOURCE_NOCACHE = 0;
103 static const int SOURCE_CACHE = 1;
104 static const int SOURCE_FILE = 2;
105 
106 // Forward declarations
107 static int
108 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
109 static void
110 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
111                  unsigned char *isdsts, size_t num_transitions,
112                  size_t num_ttinfos);
113 static int
114 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
115             int64_t *trans_local[2], size_t num_ttinfos,
116             size_t num_transitions);
117 
118 static int
119 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
120 
121 static Py_ssize_t
122 parse_abbr(const char *const p, PyObject **abbr);
123 static Py_ssize_t
124 parse_tz_delta(const char *const p, long *total_seconds);
125 static Py_ssize_t
126 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
127                       int8_t *second);
128 static Py_ssize_t
129 parse_transition_rule(const char *const p, TransitionRuleType **out);
130 
131 static _ttinfo *
132 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
133 static _ttinfo *
134 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
135                            unsigned char *fold);
136 
137 static int
138 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
139 static void
140 xdecref_ttinfo(_ttinfo *ttinfo);
141 static int
142 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
143 
144 static int
145 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
146              long dst_offset, TransitionRuleType *start,
147              TransitionRuleType *end, _tzrule *out);
148 static void
149 free_tzrule(_tzrule *tzrule);
150 
151 static PyObject *
152 load_timedelta(long seconds);
153 
154 static int
155 get_local_timestamp(PyObject *dt, int64_t *local_ts);
156 static _ttinfo *
157 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
158 
159 static int
160 ymd_to_ord(int y, int m, int d);
161 static int
162 is_leap_year(int year);
163 
164 static size_t
165 _bisect(const int64_t value, const int64_t *arr, size_t size);
166 
167 static int
168 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
169 static void
170 clear_strong_cache(const PyTypeObject *const type);
171 static void
172 update_strong_cache(const PyTypeObject *const type, PyObject *key,
173                     PyObject *zone);
174 static PyObject *
175 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
176 
177 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)178 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
179 {
180     PyObject *file_obj = NULL;
181     PyObject *file_path = NULL;
182 
183     file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
184     if (file_path == NULL) {
185         return NULL;
186     }
187     else if (file_path == Py_None) {
188         file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
189         if (file_obj == NULL) {
190             Py_DECREF(file_path);
191             return NULL;
192         }
193     }
194 
195     PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
196     if (self == NULL) {
197         goto error;
198     }
199 
200     if (file_obj == NULL) {
201         file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
202         if (file_obj == NULL) {
203             goto error;
204         }
205     }
206 
207     if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
208         goto error;
209     }
210 
211     PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
212     Py_DECREF(file_obj);
213     file_obj = NULL;
214     if (rv == NULL) {
215         goto error;
216     }
217     Py_DECREF(rv);
218 
219     ((PyZoneInfo_ZoneInfo *)self)->key = key;
220     Py_INCREF(key);
221 
222     goto cleanup;
223 error:
224     Py_XDECREF(self);
225     self = NULL;
226 cleanup:
227     if (file_obj != NULL) {
228         PyObject *exc, *val, *tb;
229         PyErr_Fetch(&exc, &val, &tb);
230         PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
231         _PyErr_ChainExceptions(exc, val, tb);
232         if (tmp == NULL) {
233             Py_CLEAR(self);
234         }
235         Py_XDECREF(tmp);
236         Py_DECREF(file_obj);
237     }
238     Py_DECREF(file_path);
239     return self;
240 }
241 
242 static PyObject *
get_weak_cache(PyTypeObject * type)243 get_weak_cache(PyTypeObject *type)
244 {
245     if (type == &PyZoneInfo_ZoneInfoType) {
246         return ZONEINFO_WEAK_CACHE;
247     }
248     else {
249         PyObject *cache =
250             PyObject_GetAttrString((PyObject *)type, "_weak_cache");
251         // We are assuming that the type lives at least as long as the function
252         // that calls get_weak_cache, and that it holds a reference to the
253         // cache, so we'll return a "borrowed reference".
254         Py_XDECREF(cache);
255         return cache;
256     }
257 }
258 
259 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)260 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
261 {
262     PyObject *key = NULL;
263     static char *kwlist[] = {"key", NULL};
264     if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
265         return NULL;
266     }
267 
268     PyObject *instance = zone_from_strong_cache(type, key);
269     if (instance != NULL || PyErr_Occurred()) {
270         return instance;
271     }
272 
273     PyObject *weak_cache = get_weak_cache(type);
274     instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
275     if (instance == NULL) {
276         return NULL;
277     }
278 
279     if (instance == Py_None) {
280         Py_DECREF(instance);
281         PyObject *tmp = zoneinfo_new_instance(type, key);
282         if (tmp == NULL) {
283             return NULL;
284         }
285 
286         instance =
287             PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
288         Py_DECREF(tmp);
289         if (instance == NULL) {
290             return NULL;
291         }
292         ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
293     }
294 
295     update_strong_cache(type, key, instance);
296     return instance;
297 }
298 
299 static void
zoneinfo_dealloc(PyObject * obj_self)300 zoneinfo_dealloc(PyObject *obj_self)
301 {
302     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
303 
304     if (self->weakreflist != NULL) {
305         PyObject_ClearWeakRefs(obj_self);
306     }
307 
308     if (self->trans_list_utc != NULL) {
309         PyMem_Free(self->trans_list_utc);
310     }
311 
312     for (size_t i = 0; i < 2; i++) {
313         if (self->trans_list_wall[i] != NULL) {
314             PyMem_Free(self->trans_list_wall[i]);
315         }
316     }
317 
318     if (self->_ttinfos != NULL) {
319         for (size_t i = 0; i < self->num_ttinfos; ++i) {
320             xdecref_ttinfo(&(self->_ttinfos[i]));
321         }
322         PyMem_Free(self->_ttinfos);
323     }
324 
325     if (self->trans_ttinfos != NULL) {
326         PyMem_Free(self->trans_ttinfos);
327     }
328 
329     free_tzrule(&(self->tzrule_after));
330 
331     Py_XDECREF(self->key);
332     Py_XDECREF(self->file_repr);
333 
334     Py_TYPE(self)->tp_free((PyObject *)self);
335 }
336 
337 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340     PyObject *file_obj = NULL;
341     PyObject *file_repr = NULL;
342     PyObject *key = Py_None;
343     PyZoneInfo_ZoneInfo *self = NULL;
344 
345     static char *kwlist[] = {"", "key", NULL};
346     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
347                                      &key)) {
348         return NULL;
349     }
350 
351     PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
352     self = (PyZoneInfo_ZoneInfo *)obj_self;
353     if (self == NULL) {
354         return NULL;
355     }
356 
357     file_repr = PyUnicode_FromFormat("%R", file_obj);
358     if (file_repr == NULL) {
359         goto error;
360     }
361 
362     if (load_data(self, file_obj)) {
363         goto error;
364     }
365 
366     self->source = SOURCE_FILE;
367     self->file_repr = file_repr;
368     self->key = key;
369     Py_INCREF(key);
370 
371     return obj_self;
372 error:
373     Py_XDECREF(file_repr);
374     Py_XDECREF(self);
375     return NULL;
376 }
377 
378 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)379 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
380 {
381     static char *kwlist[] = {"key", NULL};
382     PyObject *key = NULL;
383     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
384         return NULL;
385     }
386 
387     PyObject *out = zoneinfo_new_instance(cls, key);
388     if (out != NULL) {
389         ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
390     }
391 
392     return out;
393 }
394 
395 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)396 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
397 {
398     PyObject *only_keys = NULL;
399     static char *kwlist[] = {"only_keys", NULL};
400 
401     if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
402                                       &only_keys))) {
403         return NULL;
404     }
405 
406     PyTypeObject *type = (PyTypeObject *)cls;
407     PyObject *weak_cache = get_weak_cache(type);
408 
409     if (only_keys == NULL || only_keys == Py_None) {
410         PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
411         if (rv != NULL) {
412             Py_DECREF(rv);
413         }
414 
415         clear_strong_cache(type);
416     }
417     else {
418         PyObject *item = NULL;
419         PyObject *pop = PyUnicode_FromString("pop");
420         if (pop == NULL) {
421             return NULL;
422         }
423 
424         PyObject *iter = PyObject_GetIter(only_keys);
425         if (iter == NULL) {
426             Py_DECREF(pop);
427             return NULL;
428         }
429 
430         while ((item = PyIter_Next(iter))) {
431             // Remove from strong cache
432             if (eject_from_strong_cache(type, item) < 0) {
433                 Py_DECREF(item);
434                 break;
435             }
436 
437             // Remove from weak cache
438             PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
439                                                        Py_None, NULL);
440 
441             Py_DECREF(item);
442             if (tmp == NULL) {
443                 break;
444             }
445             Py_DECREF(tmp);
446         }
447         Py_DECREF(iter);
448         Py_DECREF(pop);
449     }
450 
451     if (PyErr_Occurred()) {
452         return NULL;
453     }
454 
455     Py_RETURN_NONE;
456 }
457 
458 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)459 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
460 {
461     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
462     if (tti == NULL) {
463         return NULL;
464     }
465     Py_INCREF(tti->utcoff);
466     return tti->utcoff;
467 }
468 
469 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)470 zoneinfo_dst(PyObject *self, PyObject *dt)
471 {
472     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
473     if (tti == NULL) {
474         return NULL;
475     }
476     Py_INCREF(tti->dstoff);
477     return tti->dstoff;
478 }
479 
480 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)481 zoneinfo_tzname(PyObject *self, PyObject *dt)
482 {
483     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
484     if (tti == NULL) {
485         return NULL;
486     }
487     Py_INCREF(tti->tzname);
488     return tti->tzname;
489 }
490 
491 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
492 
493 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)494 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
495 {
496     if (!PyDateTime_Check(dt)) {
497         PyErr_SetString(PyExc_TypeError,
498                         "fromutc: argument must be a datetime");
499         return NULL;
500     }
501     if (GET_DT_TZINFO(dt) != obj_self) {
502         PyErr_SetString(PyExc_ValueError,
503                         "fromutc: dt.tzinfo "
504                         "is not self");
505         return NULL;
506     }
507 
508     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
509 
510     int64_t timestamp;
511     if (get_local_timestamp(dt, &timestamp)) {
512         return NULL;
513     }
514     size_t num_trans = self->num_transitions;
515 
516     _ttinfo *tti = NULL;
517     unsigned char fold = 0;
518 
519     if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
520         tti = self->ttinfo_before;
521     }
522     else if (num_trans == 0 ||
523              timestamp > self->trans_list_utc[num_trans - 1]) {
524         tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
525                                          PyDateTime_GET_YEAR(dt), &fold);
526 
527         // Immediately after the last manual transition, the fold/gap is
528         // between self->trans_ttinfos[num_transitions - 1] and whatever
529         // ttinfo applies immediately after the last transition, not between
530         // the STD and DST rules in the tzrule_after, so we may need to
531         // adjust the fold value.
532         if (num_trans) {
533             _ttinfo *tti_prev = NULL;
534             if (num_trans == 1) {
535                 tti_prev = self->ttinfo_before;
536             }
537             else {
538                 tti_prev = self->trans_ttinfos[num_trans - 2];
539             }
540             int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
541             if (diff > 0 &&
542                 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
543                 fold = 1;
544             }
545         }
546     }
547     else {
548         size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
549         _ttinfo *tti_prev = NULL;
550 
551         if (idx >= 2) {
552             tti_prev = self->trans_ttinfos[idx - 2];
553             tti = self->trans_ttinfos[idx - 1];
554         }
555         else {
556             tti_prev = self->ttinfo_before;
557             tti = self->trans_ttinfos[0];
558         }
559 
560         // Detect fold
561         int64_t shift =
562             (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
563         if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
564             fold = 1;
565         }
566     }
567 
568     PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
569     if (tmp == NULL) {
570         return NULL;
571     }
572 
573     if (fold) {
574         if (PyDateTime_CheckExact(tmp)) {
575             ((PyDateTime_DateTime *)tmp)->fold = 1;
576             dt = tmp;
577         }
578         else {
579             PyObject *replace = PyObject_GetAttrString(tmp, "replace");
580             PyObject *args = PyTuple_New(0);
581             PyObject *kwargs = PyDict_New();
582 
583             Py_DECREF(tmp);
584             if (args == NULL || kwargs == NULL || replace == NULL) {
585                 Py_XDECREF(args);
586                 Py_XDECREF(kwargs);
587                 Py_XDECREF(replace);
588                 return NULL;
589             }
590 
591             dt = NULL;
592             if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
593                 dt = PyObject_Call(replace, args, kwargs);
594             }
595 
596             Py_DECREF(args);
597             Py_DECREF(kwargs);
598             Py_DECREF(replace);
599 
600             if (dt == NULL) {
601                 return NULL;
602             }
603         }
604     }
605     else {
606         dt = tmp;
607     }
608     return dt;
609 }
610 
611 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)612 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
613 {
614     PyObject *rv = NULL;
615     const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
616     if (!(self->key == Py_None)) {
617         rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
618     }
619     else {
620         assert(PyUnicode_Check(self->file_repr));
621         rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
622                                   self->file_repr);
623     }
624 
625     return rv;
626 }
627 
628 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)629 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
630 {
631     if (!(self->key == Py_None)) {
632         Py_INCREF(self->key);
633         return self->key;
634     }
635     else {
636         return zoneinfo_repr(self);
637     }
638 }
639 
640 /* Pickles the ZoneInfo object by key and source.
641  *
642  * ZoneInfo objects are pickled by reference to the TZif file that they came
643  * from, which means that the exact transitions may be different or the file
644  * may not un-pickle if the data has changed on disk in the interim.
645  *
646  * It is necessary to include a bit indicating whether or not the object
647  * was constructed from the cache, because from-cache objects will hit the
648  * unpickling process's cache, whereas no-cache objects will bypass it.
649  *
650  * Objects constructed from ZoneInfo.from_file cannot be pickled.
651  */
652 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)653 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
654 {
655     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
656     if (self->source == SOURCE_FILE) {
657         // Objects constructed from files cannot be pickled.
658         PyObject *pickle = PyImport_ImportModule("pickle");
659         if (pickle == NULL) {
660             return NULL;
661         }
662 
663         PyObject *pickle_error =
664             PyObject_GetAttrString(pickle, "PicklingError");
665         Py_DECREF(pickle);
666         if (pickle_error == NULL) {
667             return NULL;
668         }
669 
670         PyErr_Format(pickle_error,
671                      "Cannot pickle a ZoneInfo file from a file stream.");
672         Py_DECREF(pickle_error);
673         return NULL;
674     }
675 
676     unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
677     PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
678 
679     if (constructor == NULL) {
680         return NULL;
681     }
682 
683     PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
684     Py_DECREF(constructor);
685     return rv;
686 }
687 
688 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)689 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
690 {
691     PyObject *key;
692     unsigned char from_cache;
693     if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
694         return NULL;
695     }
696 
697     if (from_cache) {
698         PyObject *val_args = Py_BuildValue("(O)", key);
699         if (val_args == NULL) {
700             return NULL;
701         }
702 
703         PyObject *rv = zoneinfo_new(cls, val_args, NULL);
704 
705         Py_DECREF(val_args);
706         return rv;
707     }
708     else {
709         return zoneinfo_new_instance(cls, key);
710     }
711 }
712 
713 /* It is relatively expensive to construct new timedelta objects, and in most
714  * cases we're looking at a relatively small number of timedeltas, such as
715  * integer number of hours, etc. We will keep a cache so that we construct
716  * a minimal number of these.
717  *
718  * Possibly this should be replaced with an LRU cache so that it's not possible
719  * for the memory usage to explode from this, but in order for this to be a
720  * serious problem, one would need to deliberately craft a malicious time zone
721  * file with many distinct offsets. As of tzdb 2019c, loading every single zone
722  * fills the cache with ~450 timedeltas for a total size of ~12kB.
723  *
724  * This returns a new reference to the timedelta.
725  */
726 static PyObject *
load_timedelta(long seconds)727 load_timedelta(long seconds)
728 {
729     PyObject *rv;
730     PyObject *pyoffset = PyLong_FromLong(seconds);
731     if (pyoffset == NULL) {
732         return NULL;
733     }
734     rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
735     if (rv == NULL) {
736         if (PyErr_Occurred()) {
737             goto error;
738         }
739         PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
740             0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
741 
742         if (tmp == NULL) {
743             goto error;
744         }
745 
746         rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
747         Py_DECREF(tmp);
748     }
749 
750     Py_XINCREF(rv);
751     Py_DECREF(pyoffset);
752     return rv;
753 error:
754     Py_DECREF(pyoffset);
755     return NULL;
756 }
757 
758 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
759  * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
760  * initialized _ttinfo objects.
761  */
762 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)763 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
764 {
765     out->utcoff = NULL;
766     out->dstoff = NULL;
767     out->tzname = NULL;
768 
769     out->utcoff_seconds = utcoffset;
770     out->utcoff = load_timedelta(utcoffset);
771     if (out->utcoff == NULL) {
772         return -1;
773     }
774 
775     out->dstoff = load_timedelta(dstoffset);
776     if (out->dstoff == NULL) {
777         return -1;
778     }
779 
780     out->tzname = tzname;
781     Py_INCREF(tzname);
782 
783     return 0;
784 }
785 
786 /* Decrease reference count on any non-NULL members of a _ttinfo  */
787 static void
xdecref_ttinfo(_ttinfo * ttinfo)788 xdecref_ttinfo(_ttinfo *ttinfo)
789 {
790     if (ttinfo != NULL) {
791         Py_XDECREF(ttinfo->utcoff);
792         Py_XDECREF(ttinfo->dstoff);
793         Py_XDECREF(ttinfo->tzname);
794     }
795 }
796 
797 /* Equality function for _ttinfo. */
798 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)799 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
800 {
801     int rv;
802     if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
803         1) {
804         goto end;
805     }
806 
807     if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
808         1) {
809         goto end;
810     }
811 
812     if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
813         1) {
814         goto end;
815     }
816 end:
817     return rv;
818 }
819 
820 /* Given a file-like object, this populates a ZoneInfo object
821  *
822  * The current version calls into a Python function to read the data from
823  * file into Python objects, and this translates those Python objects into
824  * C values and calculates derived values (e.g. dstoff) in C.
825  *
826  * This returns 0 on success and -1 on failure.
827  *
828  * The function will never return while `self` is partially initialized —
829  * the object only needs to be freed / deallocated if this succeeds.
830  */
831 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)832 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
833 {
834     PyObject *data_tuple = NULL;
835 
836     long *utcoff = NULL;
837     long *dstoff = NULL;
838     size_t *trans_idx = NULL;
839     unsigned char *isdst = NULL;
840 
841     self->trans_list_utc = NULL;
842     self->trans_list_wall[0] = NULL;
843     self->trans_list_wall[1] = NULL;
844     self->trans_ttinfos = NULL;
845     self->_ttinfos = NULL;
846     self->file_repr = NULL;
847 
848     size_t ttinfos_allocated = 0;
849 
850     data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
851 
852     if (data_tuple == NULL) {
853         goto error;
854     }
855 
856     if (!PyTuple_CheckExact(data_tuple)) {
857         PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
858                      data_tuple);
859         goto error;
860     }
861 
862     // Unpack the data tuple
863     PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
864     if (trans_idx_list == NULL) {
865         goto error;
866     }
867 
868     PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
869     if (trans_utc == NULL) {
870         goto error;
871     }
872 
873     PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
874     if (utcoff_list == NULL) {
875         goto error;
876     }
877 
878     PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
879     if (isdst_list == NULL) {
880         goto error;
881     }
882 
883     PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
884     if (abbr == NULL) {
885         goto error;
886     }
887 
888     PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
889     if (tz_str == NULL) {
890         goto error;
891     }
892 
893     // Load the relevant sizes
894     Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
895     if (num_transitions < 0) {
896         goto error;
897     }
898 
899     Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
900     if (num_ttinfos < 0) {
901         goto error;
902     }
903 
904     self->num_transitions = (size_t)num_transitions;
905     self->num_ttinfos = (size_t)num_ttinfos;
906 
907     // Load the transition indices and list
908     self->trans_list_utc =
909         PyMem_Malloc(self->num_transitions * sizeof(int64_t));
910     if (self->trans_list_utc == NULL) {
911         goto error;
912     }
913     trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
914     if (trans_idx == NULL) {
915         goto error;
916     }
917 
918     for (size_t i = 0; i < self->num_transitions; ++i) {
919         PyObject *num = PyTuple_GetItem(trans_utc, i);
920         if (num == NULL) {
921             goto error;
922         }
923         self->trans_list_utc[i] = PyLong_AsLongLong(num);
924         if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
925             goto error;
926         }
927 
928         num = PyTuple_GetItem(trans_idx_list, i);
929         if (num == NULL) {
930             goto error;
931         }
932 
933         Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
934         if (cur_trans_idx == -1) {
935             goto error;
936         }
937 
938         trans_idx[i] = (size_t)cur_trans_idx;
939         if (trans_idx[i] > self->num_ttinfos) {
940             PyErr_Format(
941                 PyExc_ValueError,
942                 "Invalid transition index found while reading TZif: %zd",
943                 cur_trans_idx);
944 
945             goto error;
946         }
947     }
948 
949     // Load UTC offsets and isdst (size num_ttinfos)
950     utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
951     isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
952 
953     if (utcoff == NULL || isdst == NULL) {
954         goto error;
955     }
956     for (size_t i = 0; i < self->num_ttinfos; ++i) {
957         PyObject *num = PyTuple_GetItem(utcoff_list, i);
958         if (num == NULL) {
959             goto error;
960         }
961 
962         utcoff[i] = PyLong_AsLong(num);
963         if (utcoff[i] == -1 && PyErr_Occurred()) {
964             goto error;
965         }
966 
967         num = PyTuple_GetItem(isdst_list, i);
968         if (num == NULL) {
969             goto error;
970         }
971 
972         int isdst_with_error = PyObject_IsTrue(num);
973         if (isdst_with_error == -1) {
974             goto error;
975         }
976         else {
977             isdst[i] = (unsigned char)isdst_with_error;
978         }
979     }
980 
981     dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
982     if (dstoff == NULL) {
983         goto error;
984     }
985 
986     // Derive dstoff and trans_list_wall from the information we've loaded
987     utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
988                      self->num_ttinfos);
989 
990     if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
991                     self->trans_list_wall, self->num_ttinfos,
992                     self->num_transitions)) {
993         goto error;
994     }
995 
996     // Build _ttinfo objects from utcoff, dstoff and abbr
997     self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
998     if (self->_ttinfos == NULL) {
999         goto error;
1000     }
1001     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1002         PyObject *tzname = PyTuple_GetItem(abbr, i);
1003         if (tzname == NULL) {
1004             goto error;
1005         }
1006 
1007         ttinfos_allocated++;
1008         if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1009             goto error;
1010         }
1011     }
1012 
1013     // Build our mapping from transition to the ttinfo that applies
1014     self->trans_ttinfos =
1015         PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1016     if (self->trans_ttinfos == NULL) {
1017         goto error;
1018     }
1019     for (size_t i = 0; i < self->num_transitions; ++i) {
1020         size_t ttinfo_idx = trans_idx[i];
1021         assert(ttinfo_idx < self->num_ttinfos);
1022         self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1023     }
1024 
1025     // Set ttinfo_before to the first non-DST transition
1026     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1027         if (!isdst[i]) {
1028             self->ttinfo_before = &(self->_ttinfos[i]);
1029             break;
1030         }
1031     }
1032 
1033     // If there are only DST ttinfos, pick the first one, if there are no
1034     // ttinfos at all, set ttinfo_before to NULL
1035     if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1036         self->ttinfo_before = &(self->_ttinfos[0]);
1037     }
1038 
1039     if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1040         if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1041             goto error;
1042         }
1043     }
1044     else {
1045         if (!self->num_ttinfos) {
1046             PyErr_Format(PyExc_ValueError, "No time zone information found.");
1047             goto error;
1048         }
1049 
1050         size_t idx;
1051         if (!self->num_transitions) {
1052             idx = self->num_ttinfos - 1;
1053         }
1054         else {
1055             idx = trans_idx[self->num_transitions - 1];
1056         }
1057 
1058         _ttinfo *tti = &(self->_ttinfos[idx]);
1059         build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1060                      &(self->tzrule_after));
1061 
1062         // We've abused the build_tzrule constructor to construct an STD-only
1063         // rule mimicking whatever ttinfo we've picked up, but it's possible
1064         // that the one we've picked up is a DST zone, so we need to make sure
1065         // that the dstoff is set correctly in that case.
1066         if (PyObject_IsTrue(tti->dstoff)) {
1067             _ttinfo *tti_after = &(self->tzrule_after.std);
1068             Py_DECREF(tti_after->dstoff);
1069             tti_after->dstoff = tti->dstoff;
1070             Py_INCREF(tti_after->dstoff);
1071         }
1072     }
1073 
1074     // Determine if this is a "fixed offset" zone, meaning that the output of
1075     // the utcoffset, dst and tzname functions does not depend on the specific
1076     // datetime passed.
1077     //
1078     // We make three simplifying assumptions here:
1079     //
1080     // 1. If tzrule_after is not std_only, it has transitions that might occur
1081     //    (it is possible to construct TZ strings that specify STD and DST but
1082     //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1083     // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1084     //    represent different offsets.
1085     // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1086     //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1087     //    a fixed offset zone).
1088     //
1089     // Violations to these assumptions would be fairly exotic, and exotic
1090     // zones should almost certainly not be used with datetime.time (the
1091     // only thing that would be affected by this).
1092     if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1093         self->fixed_offset = 0;
1094     }
1095     else if (self->num_ttinfos == 0) {
1096         self->fixed_offset = 1;
1097     }
1098     else {
1099         int constant_offset =
1100             ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1101         if (constant_offset < 0) {
1102             goto error;
1103         }
1104         else {
1105             self->fixed_offset = constant_offset;
1106         }
1107     }
1108 
1109     int rv = 0;
1110     goto cleanup;
1111 error:
1112     // These resources only need to be freed if we have failed, if we succeed
1113     // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1114     // method to free the relevant resources.
1115     if (self->trans_list_utc != NULL) {
1116         PyMem_Free(self->trans_list_utc);
1117         self->trans_list_utc = NULL;
1118     }
1119 
1120     for (size_t i = 0; i < 2; ++i) {
1121         if (self->trans_list_wall[i] != NULL) {
1122             PyMem_Free(self->trans_list_wall[i]);
1123             self->trans_list_wall[i] = NULL;
1124         }
1125     }
1126 
1127     if (self->_ttinfos != NULL) {
1128         for (size_t i = 0; i < ttinfos_allocated; ++i) {
1129             xdecref_ttinfo(&(self->_ttinfos[i]));
1130         }
1131         PyMem_Free(self->_ttinfos);
1132         self->_ttinfos = NULL;
1133     }
1134 
1135     if (self->trans_ttinfos != NULL) {
1136         PyMem_Free(self->trans_ttinfos);
1137         self->trans_ttinfos = NULL;
1138     }
1139 
1140     rv = -1;
1141 cleanup:
1142     Py_XDECREF(data_tuple);
1143 
1144     if (utcoff != NULL) {
1145         PyMem_Free(utcoff);
1146     }
1147 
1148     if (dstoff != NULL) {
1149         PyMem_Free(dstoff);
1150     }
1151 
1152     if (isdst != NULL) {
1153         PyMem_Free(isdst);
1154     }
1155 
1156     if (trans_idx != NULL) {
1157         PyMem_Free(trans_idx);
1158     }
1159 
1160     return rv;
1161 }
1162 
1163 /* Function to calculate the local timestamp of a transition from the year. */
1164 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1165 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1166 {
1167     CalendarRule *self = (CalendarRule *)base_self;
1168 
1169     // We want (year, month, day of month); we have year and month, but we
1170     // need to turn (week, day-of-week) into day-of-month
1171     //
1172     // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1173     // Week 5 represents the last occurrence of day `day`, so we need to know
1174     // the first weekday of the month and the number of days in the month.
1175     int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1176     uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1177     if (self->month == 2 && is_leap_year(year)) {
1178         days_in_month += 1;
1179     }
1180 
1181     // This equation seems magical, so I'll break it down:
1182     // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1183     //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1184     //    because this math is mod 7
1185     // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1186     //    numbers so that -1 % 7 = 6).
1187     // 3. Add 1 because month days are a 1-based index.
1188     int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1189     if (month_day < 0) {
1190         month_day += 7;
1191     }
1192     month_day += 1;
1193 
1194     // Now use a 0-based index version of `week` to calculate the w-th
1195     // occurrence of `day`
1196     month_day += ((int8_t)(self->week) - 1) * 7;
1197 
1198     // month_day will only be > days_in_month if w was 5, and `w` means "last
1199     // occurrence of `d`", so now we just check if we over-shot the end of the
1200     // month and if so knock off 1 week.
1201     if (month_day > days_in_month) {
1202         month_day -= 7;
1203     }
1204 
1205     int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1206     return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1207             (int64_t)(self->minute * 60) + (int64_t)(self->second));
1208 }
1209 
1210 /* Constructor for CalendarRule. */
1211 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1212 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1213                  int8_t minute, int8_t second, CalendarRule *out)
1214 {
1215     // These bounds come from the POSIX standard, which describes an Mm.n.d
1216     // rule as:
1217     //
1218     //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1219     //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1220     //   may occur in either the fourth or the fifth week). Week 1 is the first
1221     //   week in which the d'th day occurs. Day zero is Sunday.
1222     if (month <= 0 || month > 12) {
1223         PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1224         return -1;
1225     }
1226 
1227     if (week <= 0 || week > 5) {
1228         PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1229         return -1;
1230     }
1231 
1232     // If the 'day' parameter type is changed to a signed type,
1233     // "day < 0" check must be added.
1234     if (/* day < 0 || */ day > 6) {
1235         PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1236         return -1;
1237     }
1238 
1239     TransitionRuleType base = {&calendarrule_year_to_timestamp};
1240 
1241     CalendarRule new_offset = {
1242         .base = base,
1243         .month = month,
1244         .week = week,
1245         .day = day,
1246         .hour = hour,
1247         .minute = minute,
1248         .second = second,
1249     };
1250 
1251     *out = new_offset;
1252     return 0;
1253 }
1254 
1255 /* Function to calculate the local timestamp of a transition from the year.
1256  *
1257  * This translates the day of the year into a local timestamp — either a
1258  * 1-based Julian day, not including leap days, or the 0-based year-day,
1259  * including leap days.
1260  * */
1261 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1262 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1263 {
1264     // The function signature requires a TransitionRuleType pointer, but this
1265     // function is only applicable to DayRule* objects.
1266     DayRule *self = (DayRule *)base_self;
1267 
1268     // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1269     // to know the number of days since 1970-01-01, so we must subtract off
1270     // the equivalent of ymd_to_ord(1970, 1, 1).
1271     //
1272     // We subtract off an additional 1 day to account for January 1st (we want
1273     // the number of full days *before* the date of the transition - partial
1274     // days are accounted for in the hour, minute and second portions.
1275     int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1276 
1277     // The Julian day specification skips over February 29th in leap years,
1278     // from the POSIX standard:
1279     //
1280     //   Leap days shall not be counted. That is, in all years-including leap
1281     //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1282     //   refer explicitly to the occasional February 29.
1283     //
1284     // This is actually more useful than you'd think — if you want a rule that
1285     // always transitions on a given calendar day (other than February 29th),
1286     // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1287     // always refers to December 31st.
1288     unsigned int day = self->day;
1289     if (self->julian && day >= 59 && is_leap_year(year)) {
1290         day += 1;
1291     }
1292 
1293     return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1294            (self->minute * 60) + self->second;
1295 }
1296 
1297 /* Constructor for DayRule. */
1298 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1299 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1300             int8_t second, DayRule *out)
1301 {
1302     // The POSIX standard specifies that Julian days must be in the range (1 <=
1303     // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1304     // be in the range (0 <= n <= 365).
1305     if (day < julian || day > 365) {
1306         PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1307                      julian, day);
1308         return -1;
1309     }
1310 
1311     TransitionRuleType base = {
1312         &dayrule_year_to_timestamp,
1313     };
1314 
1315     DayRule tmp = {
1316         .base = base,
1317         .julian = julian,
1318         .day = day,
1319         .hour = hour,
1320         .minute = minute,
1321         .second = second,
1322     };
1323 
1324     *out = tmp;
1325 
1326     return 0;
1327 }
1328 
1329 /* Calculate the start and end rules for a _tzrule in the given year. */
1330 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1331 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1332 {
1333     assert(rule->start != NULL);
1334     assert(rule->end != NULL);
1335     *start = rule->start->year_to_timestamp(rule->start, year);
1336     *end = rule->end->year_to_timestamp(rule->end, year);
1337 }
1338 
1339 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1340  *
1341  * This takes a local timestamp and fold for disambiguation purposes; the year
1342  * could technically be calculated from the timestamp, but given that the
1343  * callers of this function already have the year information accessible from
1344  * the datetime struct, it is taken as an additional parameter to reduce
1345  * unnecessary calculation.
1346  * */
1347 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1348 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1349 {
1350     if (rule->std_only) {
1351         return &(rule->std);
1352     }
1353 
1354     int64_t start, end;
1355     uint8_t isdst;
1356 
1357     tzrule_transitions(rule, year, &start, &end);
1358 
1359     // With fold = 0, the period (denominated in local time) with the smaller
1360     // offset starts at the end of the gap and ends at the end of the fold;
1361     // with fold = 1, it runs from the start of the gap to the beginning of the
1362     // fold.
1363     //
1364     // So in order to determine the DST boundaries we need to know both the
1365     // fold and whether DST is positive or negative (rare), and it turns out
1366     // that this boils down to fold XOR is_positive.
1367     if (fold == (rule->dst_diff >= 0)) {
1368         end -= rule->dst_diff;
1369     }
1370     else {
1371         start += rule->dst_diff;
1372     }
1373 
1374     if (start < end) {
1375         isdst = (ts >= start) && (ts < end);
1376     }
1377     else {
1378         isdst = (ts < end) || (ts >= start);
1379     }
1380 
1381     if (isdst) {
1382         return &(rule->dst);
1383     }
1384     else {
1385         return &(rule->std);
1386     }
1387 }
1388 
1389 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1390  *
1391  * This function can determine the _ttinfo that applies at a given epoch time,
1392  * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1393  * This is to be used in the .fromutc() function.
1394  *
1395  * The year is technically a redundant parameter, because it can be calculated
1396  * from the timestamp, but all callers of this function should have the year
1397  * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1398  * calculation.
1399  **/
1400 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1401 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1402                            unsigned char *fold)
1403 {
1404     if (rule->std_only) {
1405         *fold = 0;
1406         return &(rule->std);
1407     }
1408 
1409     int64_t start, end;
1410     uint8_t isdst;
1411     tzrule_transitions(rule, year, &start, &end);
1412     start -= rule->std.utcoff_seconds;
1413     end -= rule->dst.utcoff_seconds;
1414 
1415     if (start < end) {
1416         isdst = (ts >= start) && (ts < end);
1417     }
1418     else {
1419         isdst = (ts < end) || (ts >= start);
1420     }
1421 
1422     // For positive DST, the ambiguous period is one dst_diff after the end of
1423     // DST; for negative DST, the ambiguous period is one dst_diff before the
1424     // start of DST.
1425     int64_t ambig_start, ambig_end;
1426     if (rule->dst_diff > 0) {
1427         ambig_start = end;
1428         ambig_end = end + rule->dst_diff;
1429     }
1430     else {
1431         ambig_start = start;
1432         ambig_end = start - rule->dst_diff;
1433     }
1434 
1435     *fold = (ts >= ambig_start) && (ts < ambig_end);
1436 
1437     if (isdst) {
1438         return &(rule->dst);
1439     }
1440     else {
1441         return &(rule->std);
1442     }
1443 }
1444 
1445 /* Parse a TZ string in the format specified by the POSIX standard:
1446  *
1447  *  std offset[dst[offset],start[/time],end[/time]]
1448  *
1449  *  std and dst must be 3 or more characters long and must not contain a
1450  *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1451  *  spaces between "std" and "offset" are only for display and are not actually
1452  *  present in the string.
1453  *
1454  *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1455  *
1456  * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1457  *
1458  * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1459  */
1460 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1461 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1462 {
1463     PyObject *std_abbr = NULL;
1464     PyObject *dst_abbr = NULL;
1465     TransitionRuleType *start = NULL;
1466     TransitionRuleType *end = NULL;
1467     // Initialize offsets to invalid value (> 24 hours)
1468     long std_offset = 1 << 20;
1469     long dst_offset = 1 << 20;
1470 
1471     const char *tz_str = PyBytes_AsString(tz_str_obj);
1472     if (tz_str == NULL) {
1473         return -1;
1474     }
1475     const char *p = tz_str;
1476 
1477     // Read the `std` abbreviation, which must be at least 3 characters long.
1478     Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1479     if (num_chars < 1) {
1480         PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1481         goto error;
1482     }
1483 
1484     p += num_chars;
1485 
1486     // Now read the STD offset, which is required
1487     num_chars = parse_tz_delta(p, &std_offset);
1488     if (num_chars < 0) {
1489         PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1490         goto error;
1491     }
1492     p += num_chars;
1493 
1494     // If the string ends here, there is no DST, otherwise we must parse the
1495     // DST abbreviation and start and end dates and times.
1496     if (*p == '\0') {
1497         goto complete;
1498     }
1499 
1500     num_chars = parse_abbr(p, &dst_abbr);
1501     if (num_chars < 1) {
1502         PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1503         goto error;
1504     }
1505     p += num_chars;
1506 
1507     if (*p == ',') {
1508         // From the POSIX standard:
1509         //
1510         // If no offset follows dst, the alternative time is assumed to be one
1511         // hour ahead of standard time.
1512         dst_offset = std_offset + 3600;
1513     }
1514     else {
1515         num_chars = parse_tz_delta(p, &dst_offset);
1516         if (num_chars < 0) {
1517             PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1518                          tz_str_obj);
1519             goto error;
1520         }
1521 
1522         p += num_chars;
1523     }
1524 
1525     TransitionRuleType **transitions[2] = {&start, &end};
1526     for (size_t i = 0; i < 2; ++i) {
1527         if (*p != ',') {
1528             PyErr_Format(PyExc_ValueError,
1529                          "Missing transition rules in TZ string: %R",
1530                          tz_str_obj);
1531             goto error;
1532         }
1533         p++;
1534 
1535         num_chars = parse_transition_rule(p, transitions[i]);
1536         if (num_chars < 0) {
1537             PyErr_Format(PyExc_ValueError,
1538                          "Malformed transition rule in TZ string: %R",
1539                          tz_str_obj);
1540             goto error;
1541         }
1542         p += num_chars;
1543     }
1544 
1545     if (*p != '\0') {
1546         PyErr_Format(PyExc_ValueError,
1547                      "Extraneous characters at end of TZ string: %R",
1548                      tz_str_obj);
1549         goto error;
1550     }
1551 
1552 complete:
1553     build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1554     Py_DECREF(std_abbr);
1555     Py_XDECREF(dst_abbr);
1556 
1557     return 0;
1558 error:
1559     Py_XDECREF(std_abbr);
1560     if (dst_abbr != NULL && dst_abbr != Py_None) {
1561         Py_DECREF(dst_abbr);
1562     }
1563 
1564     if (start != NULL) {
1565         PyMem_Free(start);
1566     }
1567 
1568     if (end != NULL) {
1569         PyMem_Free(end);
1570     }
1571 
1572     return -1;
1573 }
1574 
1575 static int
parse_uint(const char * const p,uint8_t * value)1576 parse_uint(const char *const p, uint8_t *value)
1577 {
1578     if (!isdigit(*p)) {
1579         return -1;
1580     }
1581 
1582     *value = (*p) - '0';
1583     return 0;
1584 }
1585 
1586 /* Parse the STD and DST abbreviations from a TZ string. */
1587 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1588 parse_abbr(const char *const p, PyObject **abbr)
1589 {
1590     const char *ptr = p;
1591     char buff = *ptr;
1592     const char *str_start;
1593     const char *str_end;
1594 
1595     if (*ptr == '<') {
1596         ptr++;
1597         str_start = ptr;
1598         while ((buff = *ptr) != '>') {
1599             // From the POSIX standard:
1600             //
1601             //   In the quoted form, the first character shall be the less-than
1602             //   ( '<' ) character and the last character shall be the
1603             //   greater-than ( '>' ) character. All characters between these
1604             //   quoting characters shall be alphanumeric characters from the
1605             //   portable character set in the current locale, the plus-sign (
1606             //   '+' ) character, or the minus-sign ( '-' ) character. The std
1607             //   and dst fields in this case shall not include the quoting
1608             //   characters.
1609             if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1610                 buff != '-') {
1611                 return -1;
1612             }
1613             ptr++;
1614         }
1615         str_end = ptr;
1616         ptr++;
1617     }
1618     else {
1619         str_start = p;
1620         // From the POSIX standard:
1621         //
1622         //   In the unquoted form, all characters in these fields shall be
1623         //   alphabetic characters from the portable character set in the
1624         //   current locale.
1625         while (isalpha(*ptr)) {
1626             ptr++;
1627         }
1628         str_end = ptr;
1629     }
1630 
1631     *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1632     if (*abbr == NULL) {
1633         return -1;
1634     }
1635 
1636     return ptr - p;
1637 }
1638 
1639 /* Parse a UTC offset from a TZ str. */
1640 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1641 parse_tz_delta(const char *const p, long *total_seconds)
1642 {
1643     // From the POSIX spec:
1644     //
1645     //   Indicates the value added to the local time to arrive at Coordinated
1646     //   Universal Time. The offset has the form:
1647     //
1648     //   hh[:mm[:ss]]
1649     //
1650     //   One or more digits may be used; the value is always interpreted as a
1651     //   decimal number.
1652     //
1653     // The POSIX spec says that the values for `hour` must be between 0 and 24
1654     // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1655     // transition times may be signed and range from -167 to 167.
1656     long sign = -1;
1657     long hours = 0;
1658     long minutes = 0;
1659     long seconds = 0;
1660 
1661     const char *ptr = p;
1662     char buff = *ptr;
1663     if (buff == '-' || buff == '+') {
1664         // Negative numbers correspond to *positive* offsets, from the spec:
1665         //
1666         //   If preceded by a '-', the timezone shall be east of the Prime
1667         //   Meridian; otherwise, it shall be west (which may be indicated by
1668         //   an optional preceding '+' ).
1669         if (buff == '-') {
1670             sign = 1;
1671         }
1672 
1673         ptr++;
1674     }
1675 
1676     // The hour can be 1 or 2 numeric characters
1677     for (size_t i = 0; i < 2; ++i) {
1678         buff = *ptr;
1679         if (!isdigit(buff)) {
1680             if (i == 0) {
1681                 return -1;
1682             }
1683             else {
1684                 break;
1685             }
1686         }
1687 
1688         hours *= 10;
1689         hours += buff - '0';
1690         ptr++;
1691     }
1692 
1693     if (hours > 24 || hours < 0) {
1694         return -1;
1695     }
1696 
1697     // Minutes and seconds always of the format ":dd"
1698     long *outputs[2] = {&minutes, &seconds};
1699     for (size_t i = 0; i < 2; ++i) {
1700         if (*ptr != ':') {
1701             goto complete;
1702         }
1703         ptr++;
1704 
1705         for (size_t j = 0; j < 2; ++j) {
1706             buff = *ptr;
1707             if (!isdigit(buff)) {
1708                 return -1;
1709             }
1710             *(outputs[i]) *= 10;
1711             *(outputs[i]) += buff - '0';
1712             ptr++;
1713         }
1714     }
1715 
1716 complete:
1717     *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1718 
1719     return ptr - p;
1720 }
1721 
1722 /* Parse the date portion of a transition rule. */
1723 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1724 parse_transition_rule(const char *const p, TransitionRuleType **out)
1725 {
1726     // The full transition rule indicates when to change back and forth between
1727     // STD and DST, and has the form:
1728     //
1729     //   date[/time],date[/time]
1730     //
1731     // This function parses an individual date[/time] section, and returns
1732     // the number of characters that contributed to the transition rule. This
1733     // does not include the ',' at the end of the first rule.
1734     //
1735     // The POSIX spec states that if *time* is not given, the default is 02:00.
1736     const char *ptr = p;
1737     int8_t hour = 2;
1738     int8_t minute = 0;
1739     int8_t second = 0;
1740 
1741     // Rules come in one of three flavors:
1742     //
1743     //   1. Jn: Julian day n, with no leap days.
1744     //   2. n: Day of year (0-based, with leap days)
1745     //   3. Mm.n.d: Specifying by month, week and day-of-week.
1746 
1747     if (*ptr == 'M') {
1748         uint8_t month, week, day;
1749         ptr++;
1750         if (parse_uint(ptr, &month)) {
1751             return -1;
1752         }
1753         ptr++;
1754         if (*ptr != '.') {
1755             uint8_t tmp;
1756             if (parse_uint(ptr, &tmp)) {
1757                 return -1;
1758             }
1759 
1760             month *= 10;
1761             month += tmp;
1762             ptr++;
1763         }
1764 
1765         uint8_t *values[2] = {&week, &day};
1766         for (size_t i = 0; i < 2; ++i) {
1767             if (*ptr != '.') {
1768                 return -1;
1769             }
1770             ptr++;
1771 
1772             if (parse_uint(ptr, values[i])) {
1773                 return -1;
1774             }
1775             ptr++;
1776         }
1777 
1778         if (*ptr == '/') {
1779             ptr++;
1780             Py_ssize_t num_chars =
1781                 parse_transition_time(ptr, &hour, &minute, &second);
1782             if (num_chars < 0) {
1783                 return -1;
1784             }
1785             ptr += num_chars;
1786         }
1787 
1788         CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1789         if (rv == NULL) {
1790             return -1;
1791         }
1792 
1793         if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1794             PyMem_Free(rv);
1795             return -1;
1796         }
1797 
1798         *out = (TransitionRuleType *)rv;
1799     }
1800     else {
1801         uint8_t julian = 0;
1802         unsigned int day = 0;
1803         if (*ptr == 'J') {
1804             julian = 1;
1805             ptr++;
1806         }
1807 
1808         for (size_t i = 0; i < 3; ++i) {
1809             if (!isdigit(*ptr)) {
1810                 if (i == 0) {
1811                     return -1;
1812                 }
1813                 break;
1814             }
1815             day *= 10;
1816             day += (*ptr) - '0';
1817             ptr++;
1818         }
1819 
1820         if (*ptr == '/') {
1821             ptr++;
1822             Py_ssize_t num_chars =
1823                 parse_transition_time(ptr, &hour, &minute, &second);
1824             if (num_chars < 0) {
1825                 return -1;
1826             }
1827             ptr += num_chars;
1828         }
1829 
1830         DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1831         if (rv == NULL) {
1832             return -1;
1833         }
1834 
1835         if (dayrule_new(julian, day, hour, minute, second, rv)) {
1836             PyMem_Free(rv);
1837             return -1;
1838         }
1839         *out = (TransitionRuleType *)rv;
1840     }
1841 
1842     return ptr - p;
1843 }
1844 
1845 /* Parse the time portion of a transition rule (e.g. following an /) */
1846 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1847 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1848                       int8_t *second)
1849 {
1850     // From the spec:
1851     //
1852     //   The time has the same format as offset except that no leading sign
1853     //   ( '-' or '+' ) is allowed.
1854     //
1855     // The format for the offset is:
1856     //
1857     //   h[h][:mm[:ss]]
1858     //
1859     // RFC 8536 also allows transition times to be signed and to range from
1860     // -167 to +167, but the current version only supports [0, 99].
1861     //
1862     // TODO: Support the full range of transition hours.
1863     int8_t *components[3] = {hour, minute, second};
1864     const char *ptr = p;
1865     int8_t sign = 1;
1866 
1867     if (*ptr == '-' || *ptr == '+') {
1868         if (*ptr == '-') {
1869             sign = -1;
1870         }
1871         ptr++;
1872     }
1873 
1874     for (size_t i = 0; i < 3; ++i) {
1875         if (i > 0) {
1876             if (*ptr != ':') {
1877                 break;
1878             }
1879             ptr++;
1880         }
1881 
1882         uint8_t buff = 0;
1883         for (size_t j = 0; j < 2; j++) {
1884             if (!isdigit(*ptr)) {
1885                 if (i == 0 && j > 0) {
1886                     break;
1887                 }
1888                 return -1;
1889             }
1890 
1891             buff *= 10;
1892             buff += (*ptr) - '0';
1893             ptr++;
1894         }
1895 
1896         *(components[i]) = sign * buff;
1897     }
1898 
1899     return ptr - p;
1900 }
1901 
1902 /* Constructor for a _tzrule.
1903  *
1904  * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1905  * case `dst_offset` will be ignored and `start` and `end` are expected to be
1906  * NULL as well.
1907  *
1908  * Returns 0 on success.
1909  */
1910 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1911 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1912              long dst_offset, TransitionRuleType *start,
1913              TransitionRuleType *end, _tzrule *out)
1914 {
1915     _tzrule rv = {{0}};
1916 
1917     rv.start = start;
1918     rv.end = end;
1919 
1920     if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1921         goto error;
1922     }
1923 
1924     if (dst_abbr != NULL) {
1925         rv.dst_diff = dst_offset - std_offset;
1926         if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1927             goto error;
1928         }
1929     }
1930     else {
1931         rv.std_only = 1;
1932     }
1933 
1934     *out = rv;
1935 
1936     return 0;
1937 error:
1938     xdecref_ttinfo(&rv.std);
1939     xdecref_ttinfo(&rv.dst);
1940     return -1;
1941 }
1942 
1943 /* Destructor for _tzrule. */
1944 static void
free_tzrule(_tzrule * tzrule)1945 free_tzrule(_tzrule *tzrule)
1946 {
1947     xdecref_ttinfo(&(tzrule->std));
1948     if (!tzrule->std_only) {
1949         xdecref_ttinfo(&(tzrule->dst));
1950     }
1951 
1952     if (tzrule->start != NULL) {
1953         PyMem_Free(tzrule->start);
1954     }
1955 
1956     if (tzrule->end != NULL) {
1957         PyMem_Free(tzrule->end);
1958     }
1959 }
1960 
1961 /* Calculate DST offsets from transitions and UTC offsets
1962  *
1963  * This is necessary because each C `ttinfo` only contains the UTC offset,
1964  * time zone abbreviation and an isdst boolean - it does not include the
1965  * amount of the DST offset, but we need the amount for the dst() function.
1966  *
1967  * Thus function uses heuristics to infer what the offset should be, so it
1968  * is not guaranteed that this will work for all zones. If we cannot assign
1969  * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1970  * bool(dt.dst()) will always match ttinfo.isdst.
1971  */
1972 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1973 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1974                  unsigned char *isdsts, size_t num_transitions,
1975                  size_t num_ttinfos)
1976 {
1977     size_t dst_count = 0;
1978     size_t dst_found = 0;
1979     for (size_t i = 0; i < num_ttinfos; ++i) {
1980         dst_count++;
1981     }
1982 
1983     for (size_t i = 1; i < num_transitions; ++i) {
1984         if (dst_count == dst_found) {
1985             break;
1986         }
1987 
1988         size_t idx = trans_idx[i];
1989         size_t comp_idx = trans_idx[i - 1];
1990 
1991         // Only look at DST offsets that have nto been assigned already
1992         if (!isdsts[idx] || dstoffs[idx] != 0) {
1993             continue;
1994         }
1995 
1996         long dstoff = 0;
1997         long utcoff = utcoffs[idx];
1998 
1999         if (!isdsts[comp_idx]) {
2000             dstoff = utcoff - utcoffs[comp_idx];
2001         }
2002 
2003         if (!dstoff && idx < (num_ttinfos - 1)) {
2004             comp_idx = trans_idx[i + 1];
2005 
2006             // If the following transition is also DST and we couldn't find
2007             // the DST offset by this point, we're going to have to skip it
2008             // and hope this transition gets assigned later
2009             if (isdsts[comp_idx]) {
2010                 continue;
2011             }
2012 
2013             dstoff = utcoff - utcoffs[comp_idx];
2014         }
2015 
2016         if (dstoff) {
2017             dst_found++;
2018             dstoffs[idx] = dstoff;
2019         }
2020     }
2021 
2022     if (dst_found < dst_count) {
2023         // If there are time zones we didn't find a value for, we'll end up
2024         // with dstoff = 0 for something where isdst=1. This is obviously
2025         // wrong — one hour will be a much better guess than 0.
2026         for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2027             if (isdsts[idx] && !dstoffs[idx]) {
2028                 dstoffs[idx] = 3600;
2029             }
2030         }
2031     }
2032 }
2033 
2034 #define _swap(x, y, buffer) \
2035     buffer = x;             \
2036     x = y;                  \
2037     y = buffer;
2038 
2039 /* Calculate transitions in local time from UTC time and offsets.
2040  *
2041  * We want to know when each transition occurs, denominated in the number of
2042  * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2043  * *local time* (note: this is *not* equivalent to the output of
2044  * datetime.timestamp, which is the total number of seconds actual elapsed
2045  * since 1970-01-01T00:00:00Z in UTC).
2046  *
2047  * This is an ambiguous question because "local time" can be ambiguous — but it
2048  * is disambiguated by the `fold` parameter, so we allocate two arrays:
2049  *
2050  *  trans_local[0]: The wall-time transitions for fold=0
2051  *  trans_local[1]: The wall-time transitions for fold=1
2052  *
2053  * This returns 0 on success and a negative number of failure. The trans_local
2054  * arrays must be freed if they are not NULL.
2055  */
2056 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2057 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2058             int64_t *trans_local[2], size_t num_ttinfos,
2059             size_t num_transitions)
2060 {
2061     if (num_transitions == 0) {
2062         return 0;
2063     }
2064 
2065     // Copy the UTC transitions into each array to be modified in place later
2066     for (size_t i = 0; i < 2; ++i) {
2067         trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2068         if (trans_local[i] == NULL) {
2069             return -1;
2070         }
2071 
2072         memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2073     }
2074 
2075     int64_t offset_0, offset_1, buff;
2076     if (num_ttinfos > 1) {
2077         offset_0 = utcoff[0];
2078         offset_1 = utcoff[trans_idx[0]];
2079 
2080         if (offset_1 > offset_0) {
2081             _swap(offset_0, offset_1, buff);
2082         }
2083     }
2084     else {
2085         offset_0 = utcoff[0];
2086         offset_1 = utcoff[0];
2087     }
2088 
2089     trans_local[0][0] += offset_0;
2090     trans_local[1][0] += offset_1;
2091 
2092     for (size_t i = 1; i < num_transitions; ++i) {
2093         offset_0 = utcoff[trans_idx[i - 1]];
2094         offset_1 = utcoff[trans_idx[i]];
2095 
2096         if (offset_1 > offset_0) {
2097             _swap(offset_1, offset_0, buff);
2098         }
2099 
2100         trans_local[0][i] += offset_0;
2101         trans_local[1][i] += offset_1;
2102     }
2103 
2104     return 0;
2105 }
2106 
2107 /* Simple bisect_right binary search implementation */
2108 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2109 _bisect(const int64_t value, const int64_t *arr, size_t size)
2110 {
2111     size_t lo = 0;
2112     size_t hi = size;
2113     size_t m;
2114 
2115     while (lo < hi) {
2116         m = (lo + hi) / 2;
2117         if (arr[m] > value) {
2118             hi = m;
2119         }
2120         else {
2121             lo = m + 1;
2122         }
2123     }
2124 
2125     return hi;
2126 }
2127 
2128 /* Find the ttinfo rules that apply at a given local datetime. */
2129 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2130 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2131 {
2132     // datetime.time has a .tzinfo attribute that passes None as the dt
2133     // argument; it only really has meaning for fixed-offset zones.
2134     if (dt == Py_None) {
2135         if (self->fixed_offset) {
2136             return &(self->tzrule_after.std);
2137         }
2138         else {
2139             return &NO_TTINFO;
2140         }
2141     }
2142 
2143     int64_t ts;
2144     if (get_local_timestamp(dt, &ts)) {
2145         return NULL;
2146     }
2147 
2148     unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2149     assert(fold < 2);
2150     int64_t *local_transitions = self->trans_list_wall[fold];
2151     size_t num_trans = self->num_transitions;
2152 
2153     if (num_trans && ts < local_transitions[0]) {
2154         return self->ttinfo_before;
2155     }
2156     else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2157         return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2158                                   PyDateTime_GET_YEAR(dt));
2159     }
2160     else {
2161         size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2162         assert(idx < self->num_transitions);
2163         return self->trans_ttinfos[idx];
2164     }
2165 }
2166 
2167 static int
is_leap_year(int year)2168 is_leap_year(int year)
2169 {
2170     const unsigned int ayear = (unsigned int)year;
2171     return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2172 }
2173 
2174 /* Calculates ordinal datetime from year, month and day. */
2175 static int
ymd_to_ord(int y,int m,int d)2176 ymd_to_ord(int y, int m, int d)
2177 {
2178     y -= 1;
2179     int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2180     int yearday = DAYS_BEFORE_MONTH[m];
2181     if (m > 2 && is_leap_year(y + 1)) {
2182         yearday += 1;
2183     }
2184 
2185     return days_before_year + yearday + d;
2186 }
2187 
2188 /* Calculate the number of seconds since 1970-01-01 in local time.
2189  *
2190  * This gets a datetime in the same "units" as self->trans_list_wall so that we
2191  * can easily determine which transitions a datetime falls between. See the
2192  * comment above ts_to_local for more information.
2193  * */
2194 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2195 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2196 {
2197     assert(local_ts != NULL);
2198 
2199     int hour, minute, second;
2200     int ord;
2201     if (PyDateTime_CheckExact(dt)) {
2202         int y = PyDateTime_GET_YEAR(dt);
2203         int m = PyDateTime_GET_MONTH(dt);
2204         int d = PyDateTime_GET_DAY(dt);
2205         hour = PyDateTime_DATE_GET_HOUR(dt);
2206         minute = PyDateTime_DATE_GET_MINUTE(dt);
2207         second = PyDateTime_DATE_GET_SECOND(dt);
2208 
2209         ord = ymd_to_ord(y, m, d);
2210     }
2211     else {
2212         PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2213         if (num == NULL) {
2214             return -1;
2215         }
2216 
2217         ord = PyLong_AsLong(num);
2218         Py_DECREF(num);
2219         if (ord == -1 && PyErr_Occurred()) {
2220             return -1;
2221         }
2222 
2223         num = PyObject_GetAttrString(dt, "hour");
2224         if (num == NULL) {
2225             return -1;
2226         }
2227         hour = PyLong_AsLong(num);
2228         Py_DECREF(num);
2229         if (hour == -1) {
2230             return -1;
2231         }
2232 
2233         num = PyObject_GetAttrString(dt, "minute");
2234         if (num == NULL) {
2235             return -1;
2236         }
2237         minute = PyLong_AsLong(num);
2238         Py_DECREF(num);
2239         if (minute == -1) {
2240             return -1;
2241         }
2242 
2243         num = PyObject_GetAttrString(dt, "second");
2244         if (num == NULL) {
2245             return -1;
2246         }
2247         second = PyLong_AsLong(num);
2248         Py_DECREF(num);
2249         if (second == -1) {
2250             return -1;
2251         }
2252     }
2253 
2254     *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2255                 (int64_t)(hour * 3600 + minute * 60 + second);
2256 
2257     return 0;
2258 }
2259 
2260 /////
2261 // Functions for cache handling
2262 
2263 /* Constructor for StrongCacheNode */
2264 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2265 strong_cache_node_new(PyObject *key, PyObject *zone)
2266 {
2267     StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2268     if (node == NULL) {
2269         return NULL;
2270     }
2271 
2272     Py_INCREF(key);
2273     Py_INCREF(zone);
2274 
2275     node->next = NULL;
2276     node->prev = NULL;
2277     node->key = key;
2278     node->zone = zone;
2279 
2280     return node;
2281 }
2282 
2283 /* Destructor for StrongCacheNode */
2284 void
strong_cache_node_free(StrongCacheNode * node)2285 strong_cache_node_free(StrongCacheNode *node)
2286 {
2287     Py_XDECREF(node->key);
2288     Py_XDECREF(node->zone);
2289 
2290     PyMem_Free(node);
2291 }
2292 
2293 /* Frees all nodes at or after a specified root in the strong cache.
2294  *
2295  * This can be used on the root node to free the entire cache or it can be used
2296  * to clear all nodes that have been expired (which, if everything is going
2297  * right, will actually only be 1 node at a time).
2298  */
2299 void
strong_cache_free(StrongCacheNode * root)2300 strong_cache_free(StrongCacheNode *root)
2301 {
2302     StrongCacheNode *node = root;
2303     StrongCacheNode *next_node;
2304     while (node != NULL) {
2305         next_node = node->next;
2306         strong_cache_node_free(node);
2307 
2308         node = next_node;
2309     }
2310 }
2311 
2312 /* Removes a node from the cache and update its neighbors.
2313  *
2314  * This is used both when ejecting a node from the cache and when moving it to
2315  * the front of the cache.
2316  */
2317 static void
remove_from_strong_cache(StrongCacheNode * node)2318 remove_from_strong_cache(StrongCacheNode *node)
2319 {
2320     if (ZONEINFO_STRONG_CACHE == node) {
2321         ZONEINFO_STRONG_CACHE = node->next;
2322     }
2323 
2324     if (node->prev != NULL) {
2325         node->prev->next = node->next;
2326     }
2327 
2328     if (node->next != NULL) {
2329         node->next->prev = node->prev;
2330     }
2331 
2332     node->next = NULL;
2333     node->prev = NULL;
2334 }
2335 
2336 /* Retrieves the node associated with a key, if it exists.
2337  *
2338  * This traverses the strong cache until it finds a matching key and returns a
2339  * pointer to the relevant node if found. Returns NULL if no node is found.
2340  *
2341  * root may be NULL, indicating an empty cache.
2342  */
2343 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2344 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2345 {
2346     const StrongCacheNode *node = root;
2347     while (node != NULL) {
2348         int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2349         if (rv < 0) {
2350             return NULL;
2351         }
2352         if (rv) {
2353             return (StrongCacheNode *)node;
2354         }
2355 
2356         node = node->next;
2357     }
2358 
2359     return NULL;
2360 }
2361 
2362 /* Ejects a given key from the class's strong cache, if applicable.
2363  *
2364  * This function is used to enable the per-key functionality in clear_cache.
2365  */
2366 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2367 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2368 {
2369     if (type != &PyZoneInfo_ZoneInfoType) {
2370         return 0;
2371     }
2372 
2373     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2374     if (node != NULL) {
2375         remove_from_strong_cache(node);
2376 
2377         strong_cache_node_free(node);
2378     }
2379     else if (PyErr_Occurred()) {
2380         return -1;
2381     }
2382     return 0;
2383 }
2384 
2385 /* Moves a node to the front of the LRU cache.
2386  *
2387  * The strong cache is an LRU cache, so whenever a given node is accessed, if
2388  * it is not at the front of the cache, it needs to be moved there.
2389  */
2390 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2391 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2392 {
2393     StrongCacheNode *root_p = *root;
2394     if (root_p == node) {
2395         return;
2396     }
2397 
2398     remove_from_strong_cache(node);
2399 
2400     node->prev = NULL;
2401     node->next = root_p;
2402 
2403     if (root_p != NULL) {
2404         root_p->prev = node;
2405     }
2406 
2407     *root = node;
2408 }
2409 
2410 /* Retrieves a ZoneInfo from the strong cache if it's present.
2411  *
2412  * This function finds the ZoneInfo by key and if found will move the node to
2413  * the front of the LRU cache and return a new reference to it. It returns NULL
2414  * if the key is not in the cache.
2415  *
2416  * The strong cache is currently only implemented for the base class, so this
2417  * always returns a cache miss for subclasses.
2418  */
2419 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2420 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2421 {
2422     if (type != &PyZoneInfo_ZoneInfoType) {
2423         return NULL;  // Strong cache currently only implemented for base class
2424     }
2425 
2426     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2427 
2428     if (node != NULL) {
2429         move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2430         Py_INCREF(node->zone);
2431         return node->zone;
2432     }
2433 
2434     return NULL;  // Cache miss
2435 }
2436 
2437 /* Inserts a new key into the strong LRU cache.
2438  *
2439  * This function is only to be used after a cache miss — it creates a new node
2440  * at the front of the cache and ejects any stale entries (keeping the size of
2441  * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2442  */
2443 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2444 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2445                     PyObject *zone)
2446 {
2447     if (type != &PyZoneInfo_ZoneInfoType) {
2448         return;
2449     }
2450 
2451     StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2452 
2453     move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2454 
2455     StrongCacheNode *node = new_node->next;
2456     for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2457         if (node == NULL) {
2458             return;
2459         }
2460         node = node->next;
2461     }
2462 
2463     // Everything beyond this point needs to be freed
2464     if (node != NULL) {
2465         if (node->prev != NULL) {
2466             node->prev->next = NULL;
2467         }
2468         strong_cache_free(node);
2469     }
2470 }
2471 
2472 /* Clears all entries into a type's strong cache.
2473  *
2474  * Because the strong cache is not implemented for subclasses, this is a no-op
2475  * for everything except the base class.
2476  */
2477 void
clear_strong_cache(const PyTypeObject * const type)2478 clear_strong_cache(const PyTypeObject *const type)
2479 {
2480     if (type != &PyZoneInfo_ZoneInfoType) {
2481         return;
2482     }
2483 
2484     strong_cache_free(ZONEINFO_STRONG_CACHE);
2485     ZONEINFO_STRONG_CACHE = NULL;
2486 }
2487 
2488 static PyObject *
new_weak_cache(void)2489 new_weak_cache(void)
2490 {
2491     PyObject *weakref_module = PyImport_ImportModule("weakref");
2492     if (weakref_module == NULL) {
2493         return NULL;
2494     }
2495 
2496     PyObject *weak_cache =
2497         PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2498     Py_DECREF(weakref_module);
2499     return weak_cache;
2500 }
2501 
2502 static int
initialize_caches(void)2503 initialize_caches(void)
2504 {
2505     // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2506     if (TIMEDELTA_CACHE == NULL) {
2507         TIMEDELTA_CACHE = PyDict_New();
2508     }
2509     else {
2510         Py_INCREF(TIMEDELTA_CACHE);
2511     }
2512 
2513     if (TIMEDELTA_CACHE == NULL) {
2514         return -1;
2515     }
2516 
2517     if (ZONEINFO_WEAK_CACHE == NULL) {
2518         ZONEINFO_WEAK_CACHE = new_weak_cache();
2519     }
2520     else {
2521         Py_INCREF(ZONEINFO_WEAK_CACHE);
2522     }
2523 
2524     if (ZONEINFO_WEAK_CACHE == NULL) {
2525         return -1;
2526     }
2527 
2528     return 0;
2529 }
2530 
2531 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2532 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2533 {
2534     PyObject *weak_cache = new_weak_cache();
2535     if (weak_cache == NULL) {
2536         return NULL;
2537     }
2538 
2539     if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2540                                weak_cache) < 0) {
2541         Py_DECREF(weak_cache);
2542         return NULL;
2543     }
2544     Py_DECREF(weak_cache);
2545     Py_RETURN_NONE;
2546 }
2547 
2548 /////
2549 // Specify the ZoneInfo type
2550 static PyMethodDef zoneinfo_methods[] = {
2551     {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2552      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2553      PyDoc_STR("Clear the ZoneInfo cache.")},
2554     {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2555      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2556      PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2557     {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2558      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2559      PyDoc_STR("Create a ZoneInfo file from a file object.")},
2560     {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2561      PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2562                "the given datetime.")},
2563     {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2564      PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2565                "in a zone at the given datetime.")},
2566     {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2567      PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2568                "zone that applies in a zone at a given datetime.")},
2569     {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2570      PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2571                "datetime in local time.")},
2572     {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2573      PyDoc_STR("Function for serialization with the pickle protocol.")},
2574     {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2575      PyDoc_STR("Private method used in unpickling.")},
2576     {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2577      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2578      PyDoc_STR("Function to initialize subclasses.")},
2579     {NULL} /* Sentinel */
2580 };
2581 
2582 static PyMemberDef zoneinfo_members[] = {
2583     {.name = "key",
2584      .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2585      .type = T_OBJECT_EX,
2586      .flags = READONLY,
2587      .doc = NULL},
2588     {NULL}, /* Sentinel */
2589 };
2590 
2591 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2592     PyVarObject_HEAD_INIT(NULL, 0)  //
2593         .tp_name = "zoneinfo.ZoneInfo",
2594     .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2595     .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2596     .tp_repr = (reprfunc)zoneinfo_repr,
2597     .tp_str = (reprfunc)zoneinfo_str,
2598     .tp_getattro = PyObject_GenericGetAttr,
2599     .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2600     /* .tp_doc = zoneinfo_doc, */
2601     .tp_methods = zoneinfo_methods,
2602     .tp_members = zoneinfo_members,
2603     .tp_new = zoneinfo_new,
2604     .tp_dealloc = zoneinfo_dealloc,
2605 };
2606 
2607 /////
2608 // Specify the _zoneinfo module
2609 static PyMethodDef module_methods[] = {{NULL, NULL}};
2610 static void
module_free(void)2611 module_free(void)
2612 {
2613     Py_XDECREF(_tzpath_find_tzfile);
2614     _tzpath_find_tzfile = NULL;
2615 
2616     Py_XDECREF(_common_mod);
2617     _common_mod = NULL;
2618 
2619     Py_XDECREF(io_open);
2620     io_open = NULL;
2621 
2622     xdecref_ttinfo(&NO_TTINFO);
2623 
2624     if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2625         Py_DECREF(TIMEDELTA_CACHE);
2626     } else {
2627         Py_CLEAR(TIMEDELTA_CACHE);
2628     }
2629 
2630     if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2631         Py_DECREF(ZONEINFO_WEAK_CACHE);
2632     } else {
2633         Py_CLEAR(ZONEINFO_WEAK_CACHE);
2634     }
2635 
2636     clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2637 }
2638 
2639 static int
zoneinfomodule_exec(PyObject * m)2640 zoneinfomodule_exec(PyObject *m)
2641 {
2642     PyDateTime_IMPORT;
2643     if (PyDateTimeAPI == NULL) {
2644         goto error;
2645     }
2646     PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2647     if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2648         goto error;
2649     }
2650 
2651     Py_INCREF(&PyZoneInfo_ZoneInfoType);
2652     PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2653 
2654     /* Populate imports */
2655     PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2656     if (_tzpath_module == NULL) {
2657         goto error;
2658     }
2659 
2660     _tzpath_find_tzfile =
2661         PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2662     Py_DECREF(_tzpath_module);
2663     if (_tzpath_find_tzfile == NULL) {
2664         goto error;
2665     }
2666 
2667     PyObject *io_module = PyImport_ImportModule("io");
2668     if (io_module == NULL) {
2669         goto error;
2670     }
2671 
2672     io_open = PyObject_GetAttrString(io_module, "open");
2673     Py_DECREF(io_module);
2674     if (io_open == NULL) {
2675         goto error;
2676     }
2677 
2678     _common_mod = PyImport_ImportModule("zoneinfo._common");
2679     if (_common_mod == NULL) {
2680         goto error;
2681     }
2682 
2683     if (NO_TTINFO.utcoff == NULL) {
2684         NO_TTINFO.utcoff = Py_None;
2685         NO_TTINFO.dstoff = Py_None;
2686         NO_TTINFO.tzname = Py_None;
2687 
2688         for (size_t i = 0; i < 3; ++i) {
2689             Py_INCREF(Py_None);
2690         }
2691     }
2692 
2693     if (initialize_caches()) {
2694         goto error;
2695     }
2696 
2697     return 0;
2698 
2699 error:
2700     return -1;
2701 }
2702 
2703 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2704     {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2705 
2706 static struct PyModuleDef zoneinfomodule = {
2707     PyModuleDef_HEAD_INIT,
2708     .m_name = "_zoneinfo",
2709     .m_doc = "C implementation of the zoneinfo module",
2710     .m_size = 0,
2711     .m_methods = module_methods,
2712     .m_slots = zoneinfomodule_slots,
2713     .m_free = (freefunc)module_free};
2714 
2715 PyMODINIT_FUNC
PyInit__zoneinfo(void)2716 PyInit__zoneinfo(void)
2717 {
2718     return PyModuleDef_Init(&zoneinfomodule);
2719 }
2720