1 #include "Python.h"
2 #include "pycore_long.h" // _PyLong_GetOne()
3 #include "structmember.h"
4
5 #include <ctype.h>
6 #include <stddef.h>
7 #include <stdint.h>
8
9 #include "datetime.h"
10
11 // Imports
12 static PyObject *io_open = NULL;
13 static PyObject *_tzpath_find_tzfile = NULL;
14 static PyObject *_common_mod = NULL;
15
16 typedef struct TransitionRuleType TransitionRuleType;
17 typedef struct StrongCacheNode StrongCacheNode;
18
19 typedef struct {
20 PyObject *utcoff;
21 PyObject *dstoff;
22 PyObject *tzname;
23 long utcoff_seconds;
24 } _ttinfo;
25
26 typedef struct {
27 _ttinfo std;
28 _ttinfo dst;
29 int dst_diff;
30 TransitionRuleType *start;
31 TransitionRuleType *end;
32 unsigned char std_only;
33 } _tzrule;
34
35 typedef struct {
36 PyDateTime_TZInfo base;
37 PyObject *key;
38 PyObject *file_repr;
39 PyObject *weakreflist;
40 size_t num_transitions;
41 size_t num_ttinfos;
42 int64_t *trans_list_utc;
43 int64_t *trans_list_wall[2];
44 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
45 _ttinfo *ttinfo_before;
46 _tzrule tzrule_after;
47 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
48 unsigned char fixed_offset;
49 unsigned char source;
50 } PyZoneInfo_ZoneInfo;
51
52 struct TransitionRuleType {
53 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
54 };
55
56 typedef struct {
57 TransitionRuleType base;
58 uint8_t month;
59 uint8_t week;
60 uint8_t day;
61 int8_t hour;
62 int8_t minute;
63 int8_t second;
64 } CalendarRule;
65
66 typedef struct {
67 TransitionRuleType base;
68 uint8_t julian;
69 unsigned int day;
70 int8_t hour;
71 int8_t minute;
72 int8_t second;
73 } DayRule;
74
75 struct StrongCacheNode {
76 StrongCacheNode *next;
77 StrongCacheNode *prev;
78 PyObject *key;
79 PyObject *zone;
80 };
81
82 static PyTypeObject PyZoneInfo_ZoneInfoType;
83
84 // Globals
85 static PyObject *TIMEDELTA_CACHE = NULL;
86 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
87 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
88 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
89
90 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
91
92 // Constants
93 static const int EPOCHORDINAL = 719163;
94 static int DAYS_IN_MONTH[] = {
95 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
96 };
97
98 static int DAYS_BEFORE_MONTH[] = {
99 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
100 };
101
102 static const int SOURCE_NOCACHE = 0;
103 static const int SOURCE_CACHE = 1;
104 static const int SOURCE_FILE = 2;
105
106 // Forward declarations
107 static int
108 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
109 static void
110 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
111 unsigned char *isdsts, size_t num_transitions,
112 size_t num_ttinfos);
113 static int
114 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
115 int64_t *trans_local[2], size_t num_ttinfos,
116 size_t num_transitions);
117
118 static int
119 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
120
121 static Py_ssize_t
122 parse_abbr(const char *const p, PyObject **abbr);
123 static Py_ssize_t
124 parse_tz_delta(const char *const p, long *total_seconds);
125 static Py_ssize_t
126 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
127 int8_t *second);
128 static Py_ssize_t
129 parse_transition_rule(const char *const p, TransitionRuleType **out);
130
131 static _ttinfo *
132 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
133 static _ttinfo *
134 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
135 unsigned char *fold);
136
137 static int
138 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
139 static void
140 xdecref_ttinfo(_ttinfo *ttinfo);
141 static int
142 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
143
144 static int
145 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
146 long dst_offset, TransitionRuleType *start,
147 TransitionRuleType *end, _tzrule *out);
148 static void
149 free_tzrule(_tzrule *tzrule);
150
151 static PyObject *
152 load_timedelta(long seconds);
153
154 static int
155 get_local_timestamp(PyObject *dt, int64_t *local_ts);
156 static _ttinfo *
157 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
158
159 static int
160 ymd_to_ord(int y, int m, int d);
161 static int
162 is_leap_year(int year);
163
164 static size_t
165 _bisect(const int64_t value, const int64_t *arr, size_t size);
166
167 static int
168 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
169 static void
170 clear_strong_cache(const PyTypeObject *const type);
171 static void
172 update_strong_cache(const PyTypeObject *const type, PyObject *key,
173 PyObject *zone);
174 static PyObject *
175 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
176
177 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)178 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
179 {
180 PyObject *file_obj = NULL;
181 PyObject *file_path = NULL;
182
183 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
184 if (file_path == NULL) {
185 return NULL;
186 }
187 else if (file_path == Py_None) {
188 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
189 if (file_obj == NULL) {
190 Py_DECREF(file_path);
191 return NULL;
192 }
193 }
194
195 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
196 if (self == NULL) {
197 goto error;
198 }
199
200 if (file_obj == NULL) {
201 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
202 if (file_obj == NULL) {
203 goto error;
204 }
205 }
206
207 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
208 goto error;
209 }
210
211 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
212 Py_DECREF(file_obj);
213 file_obj = NULL;
214 if (rv == NULL) {
215 goto error;
216 }
217 Py_DECREF(rv);
218
219 ((PyZoneInfo_ZoneInfo *)self)->key = key;
220 Py_INCREF(key);
221
222 goto cleanup;
223 error:
224 Py_XDECREF(self);
225 self = NULL;
226 cleanup:
227 if (file_obj != NULL) {
228 PyObject *exc, *val, *tb;
229 PyErr_Fetch(&exc, &val, &tb);
230 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
231 _PyErr_ChainExceptions(exc, val, tb);
232 if (tmp == NULL) {
233 Py_CLEAR(self);
234 }
235 Py_XDECREF(tmp);
236 Py_DECREF(file_obj);
237 }
238 Py_DECREF(file_path);
239 return self;
240 }
241
242 static PyObject *
get_weak_cache(PyTypeObject * type)243 get_weak_cache(PyTypeObject *type)
244 {
245 if (type == &PyZoneInfo_ZoneInfoType) {
246 return ZONEINFO_WEAK_CACHE;
247 }
248 else {
249 PyObject *cache =
250 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
251 // We are assuming that the type lives at least as long as the function
252 // that calls get_weak_cache, and that it holds a reference to the
253 // cache, so we'll return a "borrowed reference".
254 Py_XDECREF(cache);
255 return cache;
256 }
257 }
258
259 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)260 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
261 {
262 PyObject *key = NULL;
263 static char *kwlist[] = {"key", NULL};
264 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
265 return NULL;
266 }
267
268 PyObject *instance = zone_from_strong_cache(type, key);
269 if (instance != NULL || PyErr_Occurred()) {
270 return instance;
271 }
272
273 PyObject *weak_cache = get_weak_cache(type);
274 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
275 if (instance == NULL) {
276 return NULL;
277 }
278
279 if (instance == Py_None) {
280 Py_DECREF(instance);
281 PyObject *tmp = zoneinfo_new_instance(type, key);
282 if (tmp == NULL) {
283 return NULL;
284 }
285
286 instance =
287 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
288 Py_DECREF(tmp);
289 if (instance == NULL) {
290 return NULL;
291 }
292 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
293 }
294
295 update_strong_cache(type, key, instance);
296 return instance;
297 }
298
299 static void
zoneinfo_dealloc(PyObject * obj_self)300 zoneinfo_dealloc(PyObject *obj_self)
301 {
302 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
303
304 if (self->weakreflist != NULL) {
305 PyObject_ClearWeakRefs(obj_self);
306 }
307
308 if (self->trans_list_utc != NULL) {
309 PyMem_Free(self->trans_list_utc);
310 }
311
312 for (size_t i = 0; i < 2; i++) {
313 if (self->trans_list_wall[i] != NULL) {
314 PyMem_Free(self->trans_list_wall[i]);
315 }
316 }
317
318 if (self->_ttinfos != NULL) {
319 for (size_t i = 0; i < self->num_ttinfos; ++i) {
320 xdecref_ttinfo(&(self->_ttinfos[i]));
321 }
322 PyMem_Free(self->_ttinfos);
323 }
324
325 if (self->trans_ttinfos != NULL) {
326 PyMem_Free(self->trans_ttinfos);
327 }
328
329 free_tzrule(&(self->tzrule_after));
330
331 Py_XDECREF(self->key);
332 Py_XDECREF(self->file_repr);
333
334 Py_TYPE(self)->tp_free((PyObject *)self);
335 }
336
337 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340 PyObject *file_obj = NULL;
341 PyObject *file_repr = NULL;
342 PyObject *key = Py_None;
343 PyZoneInfo_ZoneInfo *self = NULL;
344
345 static char *kwlist[] = {"", "key", NULL};
346 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
347 &key)) {
348 return NULL;
349 }
350
351 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
352 self = (PyZoneInfo_ZoneInfo *)obj_self;
353 if (self == NULL) {
354 return NULL;
355 }
356
357 file_repr = PyUnicode_FromFormat("%R", file_obj);
358 if (file_repr == NULL) {
359 goto error;
360 }
361
362 if (load_data(self, file_obj)) {
363 goto error;
364 }
365
366 self->source = SOURCE_FILE;
367 self->file_repr = file_repr;
368 self->key = key;
369 Py_INCREF(key);
370
371 return obj_self;
372 error:
373 Py_XDECREF(file_repr);
374 Py_XDECREF(self);
375 return NULL;
376 }
377
378 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)379 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
380 {
381 static char *kwlist[] = {"key", NULL};
382 PyObject *key = NULL;
383 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
384 return NULL;
385 }
386
387 PyObject *out = zoneinfo_new_instance(cls, key);
388 if (out != NULL) {
389 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
390 }
391
392 return out;
393 }
394
395 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)396 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
397 {
398 PyObject *only_keys = NULL;
399 static char *kwlist[] = {"only_keys", NULL};
400
401 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
402 &only_keys))) {
403 return NULL;
404 }
405
406 PyTypeObject *type = (PyTypeObject *)cls;
407 PyObject *weak_cache = get_weak_cache(type);
408
409 if (only_keys == NULL || only_keys == Py_None) {
410 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
411 if (rv != NULL) {
412 Py_DECREF(rv);
413 }
414
415 clear_strong_cache(type);
416 }
417 else {
418 PyObject *item = NULL;
419 PyObject *pop = PyUnicode_FromString("pop");
420 if (pop == NULL) {
421 return NULL;
422 }
423
424 PyObject *iter = PyObject_GetIter(only_keys);
425 if (iter == NULL) {
426 Py_DECREF(pop);
427 return NULL;
428 }
429
430 while ((item = PyIter_Next(iter))) {
431 // Remove from strong cache
432 if (eject_from_strong_cache(type, item) < 0) {
433 Py_DECREF(item);
434 break;
435 }
436
437 // Remove from weak cache
438 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
439 Py_None, NULL);
440
441 Py_DECREF(item);
442 if (tmp == NULL) {
443 break;
444 }
445 Py_DECREF(tmp);
446 }
447 Py_DECREF(iter);
448 Py_DECREF(pop);
449 }
450
451 if (PyErr_Occurred()) {
452 return NULL;
453 }
454
455 Py_RETURN_NONE;
456 }
457
458 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)459 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
460 {
461 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
462 if (tti == NULL) {
463 return NULL;
464 }
465 Py_INCREF(tti->utcoff);
466 return tti->utcoff;
467 }
468
469 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)470 zoneinfo_dst(PyObject *self, PyObject *dt)
471 {
472 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
473 if (tti == NULL) {
474 return NULL;
475 }
476 Py_INCREF(tti->dstoff);
477 return tti->dstoff;
478 }
479
480 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)481 zoneinfo_tzname(PyObject *self, PyObject *dt)
482 {
483 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
484 if (tti == NULL) {
485 return NULL;
486 }
487 Py_INCREF(tti->tzname);
488 return tti->tzname;
489 }
490
491 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
492
493 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)494 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
495 {
496 if (!PyDateTime_Check(dt)) {
497 PyErr_SetString(PyExc_TypeError,
498 "fromutc: argument must be a datetime");
499 return NULL;
500 }
501 if (GET_DT_TZINFO(dt) != obj_self) {
502 PyErr_SetString(PyExc_ValueError,
503 "fromutc: dt.tzinfo "
504 "is not self");
505 return NULL;
506 }
507
508 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
509
510 int64_t timestamp;
511 if (get_local_timestamp(dt, ×tamp)) {
512 return NULL;
513 }
514 size_t num_trans = self->num_transitions;
515
516 _ttinfo *tti = NULL;
517 unsigned char fold = 0;
518
519 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
520 tti = self->ttinfo_before;
521 }
522 else if (num_trans == 0 ||
523 timestamp > self->trans_list_utc[num_trans - 1]) {
524 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
525 PyDateTime_GET_YEAR(dt), &fold);
526
527 // Immediately after the last manual transition, the fold/gap is
528 // between self->trans_ttinfos[num_transitions - 1] and whatever
529 // ttinfo applies immediately after the last transition, not between
530 // the STD and DST rules in the tzrule_after, so we may need to
531 // adjust the fold value.
532 if (num_trans) {
533 _ttinfo *tti_prev = NULL;
534 if (num_trans == 1) {
535 tti_prev = self->ttinfo_before;
536 }
537 else {
538 tti_prev = self->trans_ttinfos[num_trans - 2];
539 }
540 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
541 if (diff > 0 &&
542 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
543 fold = 1;
544 }
545 }
546 }
547 else {
548 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
549 _ttinfo *tti_prev = NULL;
550
551 if (idx >= 2) {
552 tti_prev = self->trans_ttinfos[idx - 2];
553 tti = self->trans_ttinfos[idx - 1];
554 }
555 else {
556 tti_prev = self->ttinfo_before;
557 tti = self->trans_ttinfos[0];
558 }
559
560 // Detect fold
561 int64_t shift =
562 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
563 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
564 fold = 1;
565 }
566 }
567
568 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
569 if (tmp == NULL) {
570 return NULL;
571 }
572
573 if (fold) {
574 if (PyDateTime_CheckExact(tmp)) {
575 ((PyDateTime_DateTime *)tmp)->fold = 1;
576 dt = tmp;
577 }
578 else {
579 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
580 PyObject *args = PyTuple_New(0);
581 PyObject *kwargs = PyDict_New();
582
583 Py_DECREF(tmp);
584 if (args == NULL || kwargs == NULL || replace == NULL) {
585 Py_XDECREF(args);
586 Py_XDECREF(kwargs);
587 Py_XDECREF(replace);
588 return NULL;
589 }
590
591 dt = NULL;
592 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
593 dt = PyObject_Call(replace, args, kwargs);
594 }
595
596 Py_DECREF(args);
597 Py_DECREF(kwargs);
598 Py_DECREF(replace);
599
600 if (dt == NULL) {
601 return NULL;
602 }
603 }
604 }
605 else {
606 dt = tmp;
607 }
608 return dt;
609 }
610
611 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)612 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
613 {
614 PyObject *rv = NULL;
615 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
616 if (!(self->key == Py_None)) {
617 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
618 }
619 else {
620 assert(PyUnicode_Check(self->file_repr));
621 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
622 self->file_repr);
623 }
624
625 return rv;
626 }
627
628 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)629 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
630 {
631 if (!(self->key == Py_None)) {
632 Py_INCREF(self->key);
633 return self->key;
634 }
635 else {
636 return zoneinfo_repr(self);
637 }
638 }
639
640 /* Pickles the ZoneInfo object by key and source.
641 *
642 * ZoneInfo objects are pickled by reference to the TZif file that they came
643 * from, which means that the exact transitions may be different or the file
644 * may not un-pickle if the data has changed on disk in the interim.
645 *
646 * It is necessary to include a bit indicating whether or not the object
647 * was constructed from the cache, because from-cache objects will hit the
648 * unpickling process's cache, whereas no-cache objects will bypass it.
649 *
650 * Objects constructed from ZoneInfo.from_file cannot be pickled.
651 */
652 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)653 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
654 {
655 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
656 if (self->source == SOURCE_FILE) {
657 // Objects constructed from files cannot be pickled.
658 PyObject *pickle = PyImport_ImportModule("pickle");
659 if (pickle == NULL) {
660 return NULL;
661 }
662
663 PyObject *pickle_error =
664 PyObject_GetAttrString(pickle, "PicklingError");
665 Py_DECREF(pickle);
666 if (pickle_error == NULL) {
667 return NULL;
668 }
669
670 PyErr_Format(pickle_error,
671 "Cannot pickle a ZoneInfo file from a file stream.");
672 Py_DECREF(pickle_error);
673 return NULL;
674 }
675
676 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
677 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
678
679 if (constructor == NULL) {
680 return NULL;
681 }
682
683 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
684 Py_DECREF(constructor);
685 return rv;
686 }
687
688 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)689 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
690 {
691 PyObject *key;
692 unsigned char from_cache;
693 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
694 return NULL;
695 }
696
697 if (from_cache) {
698 PyObject *val_args = Py_BuildValue("(O)", key);
699 if (val_args == NULL) {
700 return NULL;
701 }
702
703 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
704
705 Py_DECREF(val_args);
706 return rv;
707 }
708 else {
709 return zoneinfo_new_instance(cls, key);
710 }
711 }
712
713 /* It is relatively expensive to construct new timedelta objects, and in most
714 * cases we're looking at a relatively small number of timedeltas, such as
715 * integer number of hours, etc. We will keep a cache so that we construct
716 * a minimal number of these.
717 *
718 * Possibly this should be replaced with an LRU cache so that it's not possible
719 * for the memory usage to explode from this, but in order for this to be a
720 * serious problem, one would need to deliberately craft a malicious time zone
721 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
722 * fills the cache with ~450 timedeltas for a total size of ~12kB.
723 *
724 * This returns a new reference to the timedelta.
725 */
726 static PyObject *
load_timedelta(long seconds)727 load_timedelta(long seconds)
728 {
729 PyObject *rv;
730 PyObject *pyoffset = PyLong_FromLong(seconds);
731 if (pyoffset == NULL) {
732 return NULL;
733 }
734 rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
735 if (rv == NULL) {
736 if (PyErr_Occurred()) {
737 goto error;
738 }
739 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
740 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
741
742 if (tmp == NULL) {
743 goto error;
744 }
745
746 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
747 Py_DECREF(tmp);
748 }
749
750 Py_XINCREF(rv);
751 Py_DECREF(pyoffset);
752 return rv;
753 error:
754 Py_DECREF(pyoffset);
755 return NULL;
756 }
757
758 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
759 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
760 * initialized _ttinfo objects.
761 */
762 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)763 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
764 {
765 out->utcoff = NULL;
766 out->dstoff = NULL;
767 out->tzname = NULL;
768
769 out->utcoff_seconds = utcoffset;
770 out->utcoff = load_timedelta(utcoffset);
771 if (out->utcoff == NULL) {
772 return -1;
773 }
774
775 out->dstoff = load_timedelta(dstoffset);
776 if (out->dstoff == NULL) {
777 return -1;
778 }
779
780 out->tzname = tzname;
781 Py_INCREF(tzname);
782
783 return 0;
784 }
785
786 /* Decrease reference count on any non-NULL members of a _ttinfo */
787 static void
xdecref_ttinfo(_ttinfo * ttinfo)788 xdecref_ttinfo(_ttinfo *ttinfo)
789 {
790 if (ttinfo != NULL) {
791 Py_XDECREF(ttinfo->utcoff);
792 Py_XDECREF(ttinfo->dstoff);
793 Py_XDECREF(ttinfo->tzname);
794 }
795 }
796
797 /* Equality function for _ttinfo. */
798 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)799 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
800 {
801 int rv;
802 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
803 1) {
804 goto end;
805 }
806
807 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
808 1) {
809 goto end;
810 }
811
812 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
813 1) {
814 goto end;
815 }
816 end:
817 return rv;
818 }
819
820 /* Given a file-like object, this populates a ZoneInfo object
821 *
822 * The current version calls into a Python function to read the data from
823 * file into Python objects, and this translates those Python objects into
824 * C values and calculates derived values (e.g. dstoff) in C.
825 *
826 * This returns 0 on success and -1 on failure.
827 *
828 * The function will never return while `self` is partially initialized —
829 * the object only needs to be freed / deallocated if this succeeds.
830 */
831 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)832 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
833 {
834 PyObject *data_tuple = NULL;
835
836 long *utcoff = NULL;
837 long *dstoff = NULL;
838 size_t *trans_idx = NULL;
839 unsigned char *isdst = NULL;
840
841 self->trans_list_utc = NULL;
842 self->trans_list_wall[0] = NULL;
843 self->trans_list_wall[1] = NULL;
844 self->trans_ttinfos = NULL;
845 self->_ttinfos = NULL;
846 self->file_repr = NULL;
847
848 size_t ttinfos_allocated = 0;
849
850 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
851
852 if (data_tuple == NULL) {
853 goto error;
854 }
855
856 if (!PyTuple_CheckExact(data_tuple)) {
857 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
858 data_tuple);
859 goto error;
860 }
861
862 // Unpack the data tuple
863 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
864 if (trans_idx_list == NULL) {
865 goto error;
866 }
867
868 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
869 if (trans_utc == NULL) {
870 goto error;
871 }
872
873 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
874 if (utcoff_list == NULL) {
875 goto error;
876 }
877
878 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
879 if (isdst_list == NULL) {
880 goto error;
881 }
882
883 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
884 if (abbr == NULL) {
885 goto error;
886 }
887
888 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
889 if (tz_str == NULL) {
890 goto error;
891 }
892
893 // Load the relevant sizes
894 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
895 if (num_transitions < 0) {
896 goto error;
897 }
898
899 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
900 if (num_ttinfos < 0) {
901 goto error;
902 }
903
904 self->num_transitions = (size_t)num_transitions;
905 self->num_ttinfos = (size_t)num_ttinfos;
906
907 // Load the transition indices and list
908 self->trans_list_utc =
909 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
910 if (self->trans_list_utc == NULL) {
911 goto error;
912 }
913 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
914 if (trans_idx == NULL) {
915 goto error;
916 }
917
918 for (size_t i = 0; i < self->num_transitions; ++i) {
919 PyObject *num = PyTuple_GetItem(trans_utc, i);
920 if (num == NULL) {
921 goto error;
922 }
923 self->trans_list_utc[i] = PyLong_AsLongLong(num);
924 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
925 goto error;
926 }
927
928 num = PyTuple_GetItem(trans_idx_list, i);
929 if (num == NULL) {
930 goto error;
931 }
932
933 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
934 if (cur_trans_idx == -1) {
935 goto error;
936 }
937
938 trans_idx[i] = (size_t)cur_trans_idx;
939 if (trans_idx[i] > self->num_ttinfos) {
940 PyErr_Format(
941 PyExc_ValueError,
942 "Invalid transition index found while reading TZif: %zd",
943 cur_trans_idx);
944
945 goto error;
946 }
947 }
948
949 // Load UTC offsets and isdst (size num_ttinfos)
950 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
951 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
952
953 if (utcoff == NULL || isdst == NULL) {
954 goto error;
955 }
956 for (size_t i = 0; i < self->num_ttinfos; ++i) {
957 PyObject *num = PyTuple_GetItem(utcoff_list, i);
958 if (num == NULL) {
959 goto error;
960 }
961
962 utcoff[i] = PyLong_AsLong(num);
963 if (utcoff[i] == -1 && PyErr_Occurred()) {
964 goto error;
965 }
966
967 num = PyTuple_GetItem(isdst_list, i);
968 if (num == NULL) {
969 goto error;
970 }
971
972 int isdst_with_error = PyObject_IsTrue(num);
973 if (isdst_with_error == -1) {
974 goto error;
975 }
976 else {
977 isdst[i] = (unsigned char)isdst_with_error;
978 }
979 }
980
981 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
982 if (dstoff == NULL) {
983 goto error;
984 }
985
986 // Derive dstoff and trans_list_wall from the information we've loaded
987 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
988 self->num_ttinfos);
989
990 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
991 self->trans_list_wall, self->num_ttinfos,
992 self->num_transitions)) {
993 goto error;
994 }
995
996 // Build _ttinfo objects from utcoff, dstoff and abbr
997 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
998 if (self->_ttinfos == NULL) {
999 goto error;
1000 }
1001 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1002 PyObject *tzname = PyTuple_GetItem(abbr, i);
1003 if (tzname == NULL) {
1004 goto error;
1005 }
1006
1007 ttinfos_allocated++;
1008 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1009 goto error;
1010 }
1011 }
1012
1013 // Build our mapping from transition to the ttinfo that applies
1014 self->trans_ttinfos =
1015 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1016 if (self->trans_ttinfos == NULL) {
1017 goto error;
1018 }
1019 for (size_t i = 0; i < self->num_transitions; ++i) {
1020 size_t ttinfo_idx = trans_idx[i];
1021 assert(ttinfo_idx < self->num_ttinfos);
1022 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1023 }
1024
1025 // Set ttinfo_before to the first non-DST transition
1026 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1027 if (!isdst[i]) {
1028 self->ttinfo_before = &(self->_ttinfos[i]);
1029 break;
1030 }
1031 }
1032
1033 // If there are only DST ttinfos, pick the first one, if there are no
1034 // ttinfos at all, set ttinfo_before to NULL
1035 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1036 self->ttinfo_before = &(self->_ttinfos[0]);
1037 }
1038
1039 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1040 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1041 goto error;
1042 }
1043 }
1044 else {
1045 if (!self->num_ttinfos) {
1046 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1047 goto error;
1048 }
1049
1050 size_t idx;
1051 if (!self->num_transitions) {
1052 idx = self->num_ttinfos - 1;
1053 }
1054 else {
1055 idx = trans_idx[self->num_transitions - 1];
1056 }
1057
1058 _ttinfo *tti = &(self->_ttinfos[idx]);
1059 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1060 &(self->tzrule_after));
1061
1062 // We've abused the build_tzrule constructor to construct an STD-only
1063 // rule mimicking whatever ttinfo we've picked up, but it's possible
1064 // that the one we've picked up is a DST zone, so we need to make sure
1065 // that the dstoff is set correctly in that case.
1066 if (PyObject_IsTrue(tti->dstoff)) {
1067 _ttinfo *tti_after = &(self->tzrule_after.std);
1068 Py_DECREF(tti_after->dstoff);
1069 tti_after->dstoff = tti->dstoff;
1070 Py_INCREF(tti_after->dstoff);
1071 }
1072 }
1073
1074 // Determine if this is a "fixed offset" zone, meaning that the output of
1075 // the utcoffset, dst and tzname functions does not depend on the specific
1076 // datetime passed.
1077 //
1078 // We make three simplifying assumptions here:
1079 //
1080 // 1. If tzrule_after is not std_only, it has transitions that might occur
1081 // (it is possible to construct TZ strings that specify STD and DST but
1082 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1083 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1084 // represent different offsets.
1085 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1086 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1087 // a fixed offset zone).
1088 //
1089 // Violations to these assumptions would be fairly exotic, and exotic
1090 // zones should almost certainly not be used with datetime.time (the
1091 // only thing that would be affected by this).
1092 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1093 self->fixed_offset = 0;
1094 }
1095 else if (self->num_ttinfos == 0) {
1096 self->fixed_offset = 1;
1097 }
1098 else {
1099 int constant_offset =
1100 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1101 if (constant_offset < 0) {
1102 goto error;
1103 }
1104 else {
1105 self->fixed_offset = constant_offset;
1106 }
1107 }
1108
1109 int rv = 0;
1110 goto cleanup;
1111 error:
1112 // These resources only need to be freed if we have failed, if we succeed
1113 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1114 // method to free the relevant resources.
1115 if (self->trans_list_utc != NULL) {
1116 PyMem_Free(self->trans_list_utc);
1117 self->trans_list_utc = NULL;
1118 }
1119
1120 for (size_t i = 0; i < 2; ++i) {
1121 if (self->trans_list_wall[i] != NULL) {
1122 PyMem_Free(self->trans_list_wall[i]);
1123 self->trans_list_wall[i] = NULL;
1124 }
1125 }
1126
1127 if (self->_ttinfos != NULL) {
1128 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1129 xdecref_ttinfo(&(self->_ttinfos[i]));
1130 }
1131 PyMem_Free(self->_ttinfos);
1132 self->_ttinfos = NULL;
1133 }
1134
1135 if (self->trans_ttinfos != NULL) {
1136 PyMem_Free(self->trans_ttinfos);
1137 self->trans_ttinfos = NULL;
1138 }
1139
1140 rv = -1;
1141 cleanup:
1142 Py_XDECREF(data_tuple);
1143
1144 if (utcoff != NULL) {
1145 PyMem_Free(utcoff);
1146 }
1147
1148 if (dstoff != NULL) {
1149 PyMem_Free(dstoff);
1150 }
1151
1152 if (isdst != NULL) {
1153 PyMem_Free(isdst);
1154 }
1155
1156 if (trans_idx != NULL) {
1157 PyMem_Free(trans_idx);
1158 }
1159
1160 return rv;
1161 }
1162
1163 /* Function to calculate the local timestamp of a transition from the year. */
1164 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1165 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1166 {
1167 CalendarRule *self = (CalendarRule *)base_self;
1168
1169 // We want (year, month, day of month); we have year and month, but we
1170 // need to turn (week, day-of-week) into day-of-month
1171 //
1172 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1173 // Week 5 represents the last occurrence of day `day`, so we need to know
1174 // the first weekday of the month and the number of days in the month.
1175 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1176 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1177 if (self->month == 2 && is_leap_year(year)) {
1178 days_in_month += 1;
1179 }
1180
1181 // This equation seems magical, so I'll break it down:
1182 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1183 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1184 // because this math is mod 7
1185 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1186 // numbers so that -1 % 7 = 6).
1187 // 3. Add 1 because month days are a 1-based index.
1188 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1189 if (month_day < 0) {
1190 month_day += 7;
1191 }
1192 month_day += 1;
1193
1194 // Now use a 0-based index version of `week` to calculate the w-th
1195 // occurrence of `day`
1196 month_day += ((int8_t)(self->week) - 1) * 7;
1197
1198 // month_day will only be > days_in_month if w was 5, and `w` means "last
1199 // occurrence of `d`", so now we just check if we over-shot the end of the
1200 // month and if so knock off 1 week.
1201 if (month_day > days_in_month) {
1202 month_day -= 7;
1203 }
1204
1205 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1206 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1207 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1208 }
1209
1210 /* Constructor for CalendarRule. */
1211 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1212 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1213 int8_t minute, int8_t second, CalendarRule *out)
1214 {
1215 // These bounds come from the POSIX standard, which describes an Mm.n.d
1216 // rule as:
1217 //
1218 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1219 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1220 // may occur in either the fourth or the fifth week). Week 1 is the first
1221 // week in which the d'th day occurs. Day zero is Sunday.
1222 if (month <= 0 || month > 12) {
1223 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1224 return -1;
1225 }
1226
1227 if (week <= 0 || week > 5) {
1228 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1229 return -1;
1230 }
1231
1232 // If the 'day' parameter type is changed to a signed type,
1233 // "day < 0" check must be added.
1234 if (/* day < 0 || */ day > 6) {
1235 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1236 return -1;
1237 }
1238
1239 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1240
1241 CalendarRule new_offset = {
1242 .base = base,
1243 .month = month,
1244 .week = week,
1245 .day = day,
1246 .hour = hour,
1247 .minute = minute,
1248 .second = second,
1249 };
1250
1251 *out = new_offset;
1252 return 0;
1253 }
1254
1255 /* Function to calculate the local timestamp of a transition from the year.
1256 *
1257 * This translates the day of the year into a local timestamp — either a
1258 * 1-based Julian day, not including leap days, or the 0-based year-day,
1259 * including leap days.
1260 * */
1261 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1262 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1263 {
1264 // The function signature requires a TransitionRuleType pointer, but this
1265 // function is only applicable to DayRule* objects.
1266 DayRule *self = (DayRule *)base_self;
1267
1268 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1269 // to know the number of days since 1970-01-01, so we must subtract off
1270 // the equivalent of ymd_to_ord(1970, 1, 1).
1271 //
1272 // We subtract off an additional 1 day to account for January 1st (we want
1273 // the number of full days *before* the date of the transition - partial
1274 // days are accounted for in the hour, minute and second portions.
1275 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1276
1277 // The Julian day specification skips over February 29th in leap years,
1278 // from the POSIX standard:
1279 //
1280 // Leap days shall not be counted. That is, in all years-including leap
1281 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1282 // refer explicitly to the occasional February 29.
1283 //
1284 // This is actually more useful than you'd think — if you want a rule that
1285 // always transitions on a given calendar day (other than February 29th),
1286 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1287 // always refers to December 31st.
1288 unsigned int day = self->day;
1289 if (self->julian && day >= 59 && is_leap_year(year)) {
1290 day += 1;
1291 }
1292
1293 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1294 (self->minute * 60) + self->second;
1295 }
1296
1297 /* Constructor for DayRule. */
1298 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1299 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1300 int8_t second, DayRule *out)
1301 {
1302 // The POSIX standard specifies that Julian days must be in the range (1 <=
1303 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1304 // be in the range (0 <= n <= 365).
1305 if (day < julian || day > 365) {
1306 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1307 julian, day);
1308 return -1;
1309 }
1310
1311 TransitionRuleType base = {
1312 &dayrule_year_to_timestamp,
1313 };
1314
1315 DayRule tmp = {
1316 .base = base,
1317 .julian = julian,
1318 .day = day,
1319 .hour = hour,
1320 .minute = minute,
1321 .second = second,
1322 };
1323
1324 *out = tmp;
1325
1326 return 0;
1327 }
1328
1329 /* Calculate the start and end rules for a _tzrule in the given year. */
1330 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1331 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1332 {
1333 assert(rule->start != NULL);
1334 assert(rule->end != NULL);
1335 *start = rule->start->year_to_timestamp(rule->start, year);
1336 *end = rule->end->year_to_timestamp(rule->end, year);
1337 }
1338
1339 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1340 *
1341 * This takes a local timestamp and fold for disambiguation purposes; the year
1342 * could technically be calculated from the timestamp, but given that the
1343 * callers of this function already have the year information accessible from
1344 * the datetime struct, it is taken as an additional parameter to reduce
1345 * unnecessary calculation.
1346 * */
1347 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1348 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1349 {
1350 if (rule->std_only) {
1351 return &(rule->std);
1352 }
1353
1354 int64_t start, end;
1355 uint8_t isdst;
1356
1357 tzrule_transitions(rule, year, &start, &end);
1358
1359 // With fold = 0, the period (denominated in local time) with the smaller
1360 // offset starts at the end of the gap and ends at the end of the fold;
1361 // with fold = 1, it runs from the start of the gap to the beginning of the
1362 // fold.
1363 //
1364 // So in order to determine the DST boundaries we need to know both the
1365 // fold and whether DST is positive or negative (rare), and it turns out
1366 // that this boils down to fold XOR is_positive.
1367 if (fold == (rule->dst_diff >= 0)) {
1368 end -= rule->dst_diff;
1369 }
1370 else {
1371 start += rule->dst_diff;
1372 }
1373
1374 if (start < end) {
1375 isdst = (ts >= start) && (ts < end);
1376 }
1377 else {
1378 isdst = (ts < end) || (ts >= start);
1379 }
1380
1381 if (isdst) {
1382 return &(rule->dst);
1383 }
1384 else {
1385 return &(rule->std);
1386 }
1387 }
1388
1389 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1390 *
1391 * This function can determine the _ttinfo that applies at a given epoch time,
1392 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1393 * This is to be used in the .fromutc() function.
1394 *
1395 * The year is technically a redundant parameter, because it can be calculated
1396 * from the timestamp, but all callers of this function should have the year
1397 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1398 * calculation.
1399 **/
1400 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1401 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1402 unsigned char *fold)
1403 {
1404 if (rule->std_only) {
1405 *fold = 0;
1406 return &(rule->std);
1407 }
1408
1409 int64_t start, end;
1410 uint8_t isdst;
1411 tzrule_transitions(rule, year, &start, &end);
1412 start -= rule->std.utcoff_seconds;
1413 end -= rule->dst.utcoff_seconds;
1414
1415 if (start < end) {
1416 isdst = (ts >= start) && (ts < end);
1417 }
1418 else {
1419 isdst = (ts < end) || (ts >= start);
1420 }
1421
1422 // For positive DST, the ambiguous period is one dst_diff after the end of
1423 // DST; for negative DST, the ambiguous period is one dst_diff before the
1424 // start of DST.
1425 int64_t ambig_start, ambig_end;
1426 if (rule->dst_diff > 0) {
1427 ambig_start = end;
1428 ambig_end = end + rule->dst_diff;
1429 }
1430 else {
1431 ambig_start = start;
1432 ambig_end = start - rule->dst_diff;
1433 }
1434
1435 *fold = (ts >= ambig_start) && (ts < ambig_end);
1436
1437 if (isdst) {
1438 return &(rule->dst);
1439 }
1440 else {
1441 return &(rule->std);
1442 }
1443 }
1444
1445 /* Parse a TZ string in the format specified by the POSIX standard:
1446 *
1447 * std offset[dst[offset],start[/time],end[/time]]
1448 *
1449 * std and dst must be 3 or more characters long and must not contain a
1450 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1451 * spaces between "std" and "offset" are only for display and are not actually
1452 * present in the string.
1453 *
1454 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1455 *
1456 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1457 *
1458 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1459 */
1460 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1461 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1462 {
1463 PyObject *std_abbr = NULL;
1464 PyObject *dst_abbr = NULL;
1465 TransitionRuleType *start = NULL;
1466 TransitionRuleType *end = NULL;
1467 // Initialize offsets to invalid value (> 24 hours)
1468 long std_offset = 1 << 20;
1469 long dst_offset = 1 << 20;
1470
1471 const char *tz_str = PyBytes_AsString(tz_str_obj);
1472 if (tz_str == NULL) {
1473 return -1;
1474 }
1475 const char *p = tz_str;
1476
1477 // Read the `std` abbreviation, which must be at least 3 characters long.
1478 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1479 if (num_chars < 1) {
1480 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1481 goto error;
1482 }
1483
1484 p += num_chars;
1485
1486 // Now read the STD offset, which is required
1487 num_chars = parse_tz_delta(p, &std_offset);
1488 if (num_chars < 0) {
1489 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1490 goto error;
1491 }
1492 p += num_chars;
1493
1494 // If the string ends here, there is no DST, otherwise we must parse the
1495 // DST abbreviation and start and end dates and times.
1496 if (*p == '\0') {
1497 goto complete;
1498 }
1499
1500 num_chars = parse_abbr(p, &dst_abbr);
1501 if (num_chars < 1) {
1502 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1503 goto error;
1504 }
1505 p += num_chars;
1506
1507 if (*p == ',') {
1508 // From the POSIX standard:
1509 //
1510 // If no offset follows dst, the alternative time is assumed to be one
1511 // hour ahead of standard time.
1512 dst_offset = std_offset + 3600;
1513 }
1514 else {
1515 num_chars = parse_tz_delta(p, &dst_offset);
1516 if (num_chars < 0) {
1517 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1518 tz_str_obj);
1519 goto error;
1520 }
1521
1522 p += num_chars;
1523 }
1524
1525 TransitionRuleType **transitions[2] = {&start, &end};
1526 for (size_t i = 0; i < 2; ++i) {
1527 if (*p != ',') {
1528 PyErr_Format(PyExc_ValueError,
1529 "Missing transition rules in TZ string: %R",
1530 tz_str_obj);
1531 goto error;
1532 }
1533 p++;
1534
1535 num_chars = parse_transition_rule(p, transitions[i]);
1536 if (num_chars < 0) {
1537 PyErr_Format(PyExc_ValueError,
1538 "Malformed transition rule in TZ string: %R",
1539 tz_str_obj);
1540 goto error;
1541 }
1542 p += num_chars;
1543 }
1544
1545 if (*p != '\0') {
1546 PyErr_Format(PyExc_ValueError,
1547 "Extraneous characters at end of TZ string: %R",
1548 tz_str_obj);
1549 goto error;
1550 }
1551
1552 complete:
1553 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1554 Py_DECREF(std_abbr);
1555 Py_XDECREF(dst_abbr);
1556
1557 return 0;
1558 error:
1559 Py_XDECREF(std_abbr);
1560 if (dst_abbr != NULL && dst_abbr != Py_None) {
1561 Py_DECREF(dst_abbr);
1562 }
1563
1564 if (start != NULL) {
1565 PyMem_Free(start);
1566 }
1567
1568 if (end != NULL) {
1569 PyMem_Free(end);
1570 }
1571
1572 return -1;
1573 }
1574
1575 static int
parse_uint(const char * const p,uint8_t * value)1576 parse_uint(const char *const p, uint8_t *value)
1577 {
1578 if (!isdigit(*p)) {
1579 return -1;
1580 }
1581
1582 *value = (*p) - '0';
1583 return 0;
1584 }
1585
1586 /* Parse the STD and DST abbreviations from a TZ string. */
1587 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1588 parse_abbr(const char *const p, PyObject **abbr)
1589 {
1590 const char *ptr = p;
1591 char buff = *ptr;
1592 const char *str_start;
1593 const char *str_end;
1594
1595 if (*ptr == '<') {
1596 ptr++;
1597 str_start = ptr;
1598 while ((buff = *ptr) != '>') {
1599 // From the POSIX standard:
1600 //
1601 // In the quoted form, the first character shall be the less-than
1602 // ( '<' ) character and the last character shall be the
1603 // greater-than ( '>' ) character. All characters between these
1604 // quoting characters shall be alphanumeric characters from the
1605 // portable character set in the current locale, the plus-sign (
1606 // '+' ) character, or the minus-sign ( '-' ) character. The std
1607 // and dst fields in this case shall not include the quoting
1608 // characters.
1609 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1610 buff != '-') {
1611 return -1;
1612 }
1613 ptr++;
1614 }
1615 str_end = ptr;
1616 ptr++;
1617 }
1618 else {
1619 str_start = p;
1620 // From the POSIX standard:
1621 //
1622 // In the unquoted form, all characters in these fields shall be
1623 // alphabetic characters from the portable character set in the
1624 // current locale.
1625 while (isalpha(*ptr)) {
1626 ptr++;
1627 }
1628 str_end = ptr;
1629 }
1630
1631 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1632 if (*abbr == NULL) {
1633 return -1;
1634 }
1635
1636 return ptr - p;
1637 }
1638
1639 /* Parse a UTC offset from a TZ str. */
1640 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1641 parse_tz_delta(const char *const p, long *total_seconds)
1642 {
1643 // From the POSIX spec:
1644 //
1645 // Indicates the value added to the local time to arrive at Coordinated
1646 // Universal Time. The offset has the form:
1647 //
1648 // hh[:mm[:ss]]
1649 //
1650 // One or more digits may be used; the value is always interpreted as a
1651 // decimal number.
1652 //
1653 // The POSIX spec says that the values for `hour` must be between 0 and 24
1654 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1655 // transition times may be signed and range from -167 to 167.
1656 long sign = -1;
1657 long hours = 0;
1658 long minutes = 0;
1659 long seconds = 0;
1660
1661 const char *ptr = p;
1662 char buff = *ptr;
1663 if (buff == '-' || buff == '+') {
1664 // Negative numbers correspond to *positive* offsets, from the spec:
1665 //
1666 // If preceded by a '-', the timezone shall be east of the Prime
1667 // Meridian; otherwise, it shall be west (which may be indicated by
1668 // an optional preceding '+' ).
1669 if (buff == '-') {
1670 sign = 1;
1671 }
1672
1673 ptr++;
1674 }
1675
1676 // The hour can be 1 or 2 numeric characters
1677 for (size_t i = 0; i < 2; ++i) {
1678 buff = *ptr;
1679 if (!isdigit(buff)) {
1680 if (i == 0) {
1681 return -1;
1682 }
1683 else {
1684 break;
1685 }
1686 }
1687
1688 hours *= 10;
1689 hours += buff - '0';
1690 ptr++;
1691 }
1692
1693 if (hours > 24 || hours < 0) {
1694 return -1;
1695 }
1696
1697 // Minutes and seconds always of the format ":dd"
1698 long *outputs[2] = {&minutes, &seconds};
1699 for (size_t i = 0; i < 2; ++i) {
1700 if (*ptr != ':') {
1701 goto complete;
1702 }
1703 ptr++;
1704
1705 for (size_t j = 0; j < 2; ++j) {
1706 buff = *ptr;
1707 if (!isdigit(buff)) {
1708 return -1;
1709 }
1710 *(outputs[i]) *= 10;
1711 *(outputs[i]) += buff - '0';
1712 ptr++;
1713 }
1714 }
1715
1716 complete:
1717 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1718
1719 return ptr - p;
1720 }
1721
1722 /* Parse the date portion of a transition rule. */
1723 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1724 parse_transition_rule(const char *const p, TransitionRuleType **out)
1725 {
1726 // The full transition rule indicates when to change back and forth between
1727 // STD and DST, and has the form:
1728 //
1729 // date[/time],date[/time]
1730 //
1731 // This function parses an individual date[/time] section, and returns
1732 // the number of characters that contributed to the transition rule. This
1733 // does not include the ',' at the end of the first rule.
1734 //
1735 // The POSIX spec states that if *time* is not given, the default is 02:00.
1736 const char *ptr = p;
1737 int8_t hour = 2;
1738 int8_t minute = 0;
1739 int8_t second = 0;
1740
1741 // Rules come in one of three flavors:
1742 //
1743 // 1. Jn: Julian day n, with no leap days.
1744 // 2. n: Day of year (0-based, with leap days)
1745 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1746
1747 if (*ptr == 'M') {
1748 uint8_t month, week, day;
1749 ptr++;
1750 if (parse_uint(ptr, &month)) {
1751 return -1;
1752 }
1753 ptr++;
1754 if (*ptr != '.') {
1755 uint8_t tmp;
1756 if (parse_uint(ptr, &tmp)) {
1757 return -1;
1758 }
1759
1760 month *= 10;
1761 month += tmp;
1762 ptr++;
1763 }
1764
1765 uint8_t *values[2] = {&week, &day};
1766 for (size_t i = 0; i < 2; ++i) {
1767 if (*ptr != '.') {
1768 return -1;
1769 }
1770 ptr++;
1771
1772 if (parse_uint(ptr, values[i])) {
1773 return -1;
1774 }
1775 ptr++;
1776 }
1777
1778 if (*ptr == '/') {
1779 ptr++;
1780 Py_ssize_t num_chars =
1781 parse_transition_time(ptr, &hour, &minute, &second);
1782 if (num_chars < 0) {
1783 return -1;
1784 }
1785 ptr += num_chars;
1786 }
1787
1788 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1789 if (rv == NULL) {
1790 return -1;
1791 }
1792
1793 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1794 PyMem_Free(rv);
1795 return -1;
1796 }
1797
1798 *out = (TransitionRuleType *)rv;
1799 }
1800 else {
1801 uint8_t julian = 0;
1802 unsigned int day = 0;
1803 if (*ptr == 'J') {
1804 julian = 1;
1805 ptr++;
1806 }
1807
1808 for (size_t i = 0; i < 3; ++i) {
1809 if (!isdigit(*ptr)) {
1810 if (i == 0) {
1811 return -1;
1812 }
1813 break;
1814 }
1815 day *= 10;
1816 day += (*ptr) - '0';
1817 ptr++;
1818 }
1819
1820 if (*ptr == '/') {
1821 ptr++;
1822 Py_ssize_t num_chars =
1823 parse_transition_time(ptr, &hour, &minute, &second);
1824 if (num_chars < 0) {
1825 return -1;
1826 }
1827 ptr += num_chars;
1828 }
1829
1830 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1831 if (rv == NULL) {
1832 return -1;
1833 }
1834
1835 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1836 PyMem_Free(rv);
1837 return -1;
1838 }
1839 *out = (TransitionRuleType *)rv;
1840 }
1841
1842 return ptr - p;
1843 }
1844
1845 /* Parse the time portion of a transition rule (e.g. following an /) */
1846 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1847 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1848 int8_t *second)
1849 {
1850 // From the spec:
1851 //
1852 // The time has the same format as offset except that no leading sign
1853 // ( '-' or '+' ) is allowed.
1854 //
1855 // The format for the offset is:
1856 //
1857 // h[h][:mm[:ss]]
1858 //
1859 // RFC 8536 also allows transition times to be signed and to range from
1860 // -167 to +167, but the current version only supports [0, 99].
1861 //
1862 // TODO: Support the full range of transition hours.
1863 int8_t *components[3] = {hour, minute, second};
1864 const char *ptr = p;
1865 int8_t sign = 1;
1866
1867 if (*ptr == '-' || *ptr == '+') {
1868 if (*ptr == '-') {
1869 sign = -1;
1870 }
1871 ptr++;
1872 }
1873
1874 for (size_t i = 0; i < 3; ++i) {
1875 if (i > 0) {
1876 if (*ptr != ':') {
1877 break;
1878 }
1879 ptr++;
1880 }
1881
1882 uint8_t buff = 0;
1883 for (size_t j = 0; j < 2; j++) {
1884 if (!isdigit(*ptr)) {
1885 if (i == 0 && j > 0) {
1886 break;
1887 }
1888 return -1;
1889 }
1890
1891 buff *= 10;
1892 buff += (*ptr) - '0';
1893 ptr++;
1894 }
1895
1896 *(components[i]) = sign * buff;
1897 }
1898
1899 return ptr - p;
1900 }
1901
1902 /* Constructor for a _tzrule.
1903 *
1904 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1905 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1906 * NULL as well.
1907 *
1908 * Returns 0 on success.
1909 */
1910 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1911 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1912 long dst_offset, TransitionRuleType *start,
1913 TransitionRuleType *end, _tzrule *out)
1914 {
1915 _tzrule rv = {{0}};
1916
1917 rv.start = start;
1918 rv.end = end;
1919
1920 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1921 goto error;
1922 }
1923
1924 if (dst_abbr != NULL) {
1925 rv.dst_diff = dst_offset - std_offset;
1926 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1927 goto error;
1928 }
1929 }
1930 else {
1931 rv.std_only = 1;
1932 }
1933
1934 *out = rv;
1935
1936 return 0;
1937 error:
1938 xdecref_ttinfo(&rv.std);
1939 xdecref_ttinfo(&rv.dst);
1940 return -1;
1941 }
1942
1943 /* Destructor for _tzrule. */
1944 static void
free_tzrule(_tzrule * tzrule)1945 free_tzrule(_tzrule *tzrule)
1946 {
1947 xdecref_ttinfo(&(tzrule->std));
1948 if (!tzrule->std_only) {
1949 xdecref_ttinfo(&(tzrule->dst));
1950 }
1951
1952 if (tzrule->start != NULL) {
1953 PyMem_Free(tzrule->start);
1954 }
1955
1956 if (tzrule->end != NULL) {
1957 PyMem_Free(tzrule->end);
1958 }
1959 }
1960
1961 /* Calculate DST offsets from transitions and UTC offsets
1962 *
1963 * This is necessary because each C `ttinfo` only contains the UTC offset,
1964 * time zone abbreviation and an isdst boolean - it does not include the
1965 * amount of the DST offset, but we need the amount for the dst() function.
1966 *
1967 * Thus function uses heuristics to infer what the offset should be, so it
1968 * is not guaranteed that this will work for all zones. If we cannot assign
1969 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1970 * bool(dt.dst()) will always match ttinfo.isdst.
1971 */
1972 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1973 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1974 unsigned char *isdsts, size_t num_transitions,
1975 size_t num_ttinfos)
1976 {
1977 size_t dst_count = 0;
1978 size_t dst_found = 0;
1979 for (size_t i = 0; i < num_ttinfos; ++i) {
1980 dst_count++;
1981 }
1982
1983 for (size_t i = 1; i < num_transitions; ++i) {
1984 if (dst_count == dst_found) {
1985 break;
1986 }
1987
1988 size_t idx = trans_idx[i];
1989 size_t comp_idx = trans_idx[i - 1];
1990
1991 // Only look at DST offsets that have nto been assigned already
1992 if (!isdsts[idx] || dstoffs[idx] != 0) {
1993 continue;
1994 }
1995
1996 long dstoff = 0;
1997 long utcoff = utcoffs[idx];
1998
1999 if (!isdsts[comp_idx]) {
2000 dstoff = utcoff - utcoffs[comp_idx];
2001 }
2002
2003 if (!dstoff && idx < (num_ttinfos - 1)) {
2004 comp_idx = trans_idx[i + 1];
2005
2006 // If the following transition is also DST and we couldn't find
2007 // the DST offset by this point, we're going to have to skip it
2008 // and hope this transition gets assigned later
2009 if (isdsts[comp_idx]) {
2010 continue;
2011 }
2012
2013 dstoff = utcoff - utcoffs[comp_idx];
2014 }
2015
2016 if (dstoff) {
2017 dst_found++;
2018 dstoffs[idx] = dstoff;
2019 }
2020 }
2021
2022 if (dst_found < dst_count) {
2023 // If there are time zones we didn't find a value for, we'll end up
2024 // with dstoff = 0 for something where isdst=1. This is obviously
2025 // wrong — one hour will be a much better guess than 0.
2026 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2027 if (isdsts[idx] && !dstoffs[idx]) {
2028 dstoffs[idx] = 3600;
2029 }
2030 }
2031 }
2032 }
2033
2034 #define _swap(x, y, buffer) \
2035 buffer = x; \
2036 x = y; \
2037 y = buffer;
2038
2039 /* Calculate transitions in local time from UTC time and offsets.
2040 *
2041 * We want to know when each transition occurs, denominated in the number of
2042 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2043 * *local time* (note: this is *not* equivalent to the output of
2044 * datetime.timestamp, which is the total number of seconds actual elapsed
2045 * since 1970-01-01T00:00:00Z in UTC).
2046 *
2047 * This is an ambiguous question because "local time" can be ambiguous — but it
2048 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2049 *
2050 * trans_local[0]: The wall-time transitions for fold=0
2051 * trans_local[1]: The wall-time transitions for fold=1
2052 *
2053 * This returns 0 on success and a negative number of failure. The trans_local
2054 * arrays must be freed if they are not NULL.
2055 */
2056 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2057 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2058 int64_t *trans_local[2], size_t num_ttinfos,
2059 size_t num_transitions)
2060 {
2061 if (num_transitions == 0) {
2062 return 0;
2063 }
2064
2065 // Copy the UTC transitions into each array to be modified in place later
2066 for (size_t i = 0; i < 2; ++i) {
2067 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2068 if (trans_local[i] == NULL) {
2069 return -1;
2070 }
2071
2072 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2073 }
2074
2075 int64_t offset_0, offset_1, buff;
2076 if (num_ttinfos > 1) {
2077 offset_0 = utcoff[0];
2078 offset_1 = utcoff[trans_idx[0]];
2079
2080 if (offset_1 > offset_0) {
2081 _swap(offset_0, offset_1, buff);
2082 }
2083 }
2084 else {
2085 offset_0 = utcoff[0];
2086 offset_1 = utcoff[0];
2087 }
2088
2089 trans_local[0][0] += offset_0;
2090 trans_local[1][0] += offset_1;
2091
2092 for (size_t i = 1; i < num_transitions; ++i) {
2093 offset_0 = utcoff[trans_idx[i - 1]];
2094 offset_1 = utcoff[trans_idx[i]];
2095
2096 if (offset_1 > offset_0) {
2097 _swap(offset_1, offset_0, buff);
2098 }
2099
2100 trans_local[0][i] += offset_0;
2101 trans_local[1][i] += offset_1;
2102 }
2103
2104 return 0;
2105 }
2106
2107 /* Simple bisect_right binary search implementation */
2108 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2109 _bisect(const int64_t value, const int64_t *arr, size_t size)
2110 {
2111 size_t lo = 0;
2112 size_t hi = size;
2113 size_t m;
2114
2115 while (lo < hi) {
2116 m = (lo + hi) / 2;
2117 if (arr[m] > value) {
2118 hi = m;
2119 }
2120 else {
2121 lo = m + 1;
2122 }
2123 }
2124
2125 return hi;
2126 }
2127
2128 /* Find the ttinfo rules that apply at a given local datetime. */
2129 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2130 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2131 {
2132 // datetime.time has a .tzinfo attribute that passes None as the dt
2133 // argument; it only really has meaning for fixed-offset zones.
2134 if (dt == Py_None) {
2135 if (self->fixed_offset) {
2136 return &(self->tzrule_after.std);
2137 }
2138 else {
2139 return &NO_TTINFO;
2140 }
2141 }
2142
2143 int64_t ts;
2144 if (get_local_timestamp(dt, &ts)) {
2145 return NULL;
2146 }
2147
2148 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2149 assert(fold < 2);
2150 int64_t *local_transitions = self->trans_list_wall[fold];
2151 size_t num_trans = self->num_transitions;
2152
2153 if (num_trans && ts < local_transitions[0]) {
2154 return self->ttinfo_before;
2155 }
2156 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2157 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2158 PyDateTime_GET_YEAR(dt));
2159 }
2160 else {
2161 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2162 assert(idx < self->num_transitions);
2163 return self->trans_ttinfos[idx];
2164 }
2165 }
2166
2167 static int
is_leap_year(int year)2168 is_leap_year(int year)
2169 {
2170 const unsigned int ayear = (unsigned int)year;
2171 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2172 }
2173
2174 /* Calculates ordinal datetime from year, month and day. */
2175 static int
ymd_to_ord(int y,int m,int d)2176 ymd_to_ord(int y, int m, int d)
2177 {
2178 y -= 1;
2179 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2180 int yearday = DAYS_BEFORE_MONTH[m];
2181 if (m > 2 && is_leap_year(y + 1)) {
2182 yearday += 1;
2183 }
2184
2185 return days_before_year + yearday + d;
2186 }
2187
2188 /* Calculate the number of seconds since 1970-01-01 in local time.
2189 *
2190 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2191 * can easily determine which transitions a datetime falls between. See the
2192 * comment above ts_to_local for more information.
2193 * */
2194 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2195 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2196 {
2197 assert(local_ts != NULL);
2198
2199 int hour, minute, second;
2200 int ord;
2201 if (PyDateTime_CheckExact(dt)) {
2202 int y = PyDateTime_GET_YEAR(dt);
2203 int m = PyDateTime_GET_MONTH(dt);
2204 int d = PyDateTime_GET_DAY(dt);
2205 hour = PyDateTime_DATE_GET_HOUR(dt);
2206 minute = PyDateTime_DATE_GET_MINUTE(dt);
2207 second = PyDateTime_DATE_GET_SECOND(dt);
2208
2209 ord = ymd_to_ord(y, m, d);
2210 }
2211 else {
2212 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2213 if (num == NULL) {
2214 return -1;
2215 }
2216
2217 ord = PyLong_AsLong(num);
2218 Py_DECREF(num);
2219 if (ord == -1 && PyErr_Occurred()) {
2220 return -1;
2221 }
2222
2223 num = PyObject_GetAttrString(dt, "hour");
2224 if (num == NULL) {
2225 return -1;
2226 }
2227 hour = PyLong_AsLong(num);
2228 Py_DECREF(num);
2229 if (hour == -1) {
2230 return -1;
2231 }
2232
2233 num = PyObject_GetAttrString(dt, "minute");
2234 if (num == NULL) {
2235 return -1;
2236 }
2237 minute = PyLong_AsLong(num);
2238 Py_DECREF(num);
2239 if (minute == -1) {
2240 return -1;
2241 }
2242
2243 num = PyObject_GetAttrString(dt, "second");
2244 if (num == NULL) {
2245 return -1;
2246 }
2247 second = PyLong_AsLong(num);
2248 Py_DECREF(num);
2249 if (second == -1) {
2250 return -1;
2251 }
2252 }
2253
2254 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2255 (int64_t)(hour * 3600 + minute * 60 + second);
2256
2257 return 0;
2258 }
2259
2260 /////
2261 // Functions for cache handling
2262
2263 /* Constructor for StrongCacheNode */
2264 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2265 strong_cache_node_new(PyObject *key, PyObject *zone)
2266 {
2267 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2268 if (node == NULL) {
2269 return NULL;
2270 }
2271
2272 Py_INCREF(key);
2273 Py_INCREF(zone);
2274
2275 node->next = NULL;
2276 node->prev = NULL;
2277 node->key = key;
2278 node->zone = zone;
2279
2280 return node;
2281 }
2282
2283 /* Destructor for StrongCacheNode */
2284 void
strong_cache_node_free(StrongCacheNode * node)2285 strong_cache_node_free(StrongCacheNode *node)
2286 {
2287 Py_XDECREF(node->key);
2288 Py_XDECREF(node->zone);
2289
2290 PyMem_Free(node);
2291 }
2292
2293 /* Frees all nodes at or after a specified root in the strong cache.
2294 *
2295 * This can be used on the root node to free the entire cache or it can be used
2296 * to clear all nodes that have been expired (which, if everything is going
2297 * right, will actually only be 1 node at a time).
2298 */
2299 void
strong_cache_free(StrongCacheNode * root)2300 strong_cache_free(StrongCacheNode *root)
2301 {
2302 StrongCacheNode *node = root;
2303 StrongCacheNode *next_node;
2304 while (node != NULL) {
2305 next_node = node->next;
2306 strong_cache_node_free(node);
2307
2308 node = next_node;
2309 }
2310 }
2311
2312 /* Removes a node from the cache and update its neighbors.
2313 *
2314 * This is used both when ejecting a node from the cache and when moving it to
2315 * the front of the cache.
2316 */
2317 static void
remove_from_strong_cache(StrongCacheNode * node)2318 remove_from_strong_cache(StrongCacheNode *node)
2319 {
2320 if (ZONEINFO_STRONG_CACHE == node) {
2321 ZONEINFO_STRONG_CACHE = node->next;
2322 }
2323
2324 if (node->prev != NULL) {
2325 node->prev->next = node->next;
2326 }
2327
2328 if (node->next != NULL) {
2329 node->next->prev = node->prev;
2330 }
2331
2332 node->next = NULL;
2333 node->prev = NULL;
2334 }
2335
2336 /* Retrieves the node associated with a key, if it exists.
2337 *
2338 * This traverses the strong cache until it finds a matching key and returns a
2339 * pointer to the relevant node if found. Returns NULL if no node is found.
2340 *
2341 * root may be NULL, indicating an empty cache.
2342 */
2343 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2344 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2345 {
2346 const StrongCacheNode *node = root;
2347 while (node != NULL) {
2348 int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2349 if (rv < 0) {
2350 return NULL;
2351 }
2352 if (rv) {
2353 return (StrongCacheNode *)node;
2354 }
2355
2356 node = node->next;
2357 }
2358
2359 return NULL;
2360 }
2361
2362 /* Ejects a given key from the class's strong cache, if applicable.
2363 *
2364 * This function is used to enable the per-key functionality in clear_cache.
2365 */
2366 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2367 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2368 {
2369 if (type != &PyZoneInfo_ZoneInfoType) {
2370 return 0;
2371 }
2372
2373 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2374 if (node != NULL) {
2375 remove_from_strong_cache(node);
2376
2377 strong_cache_node_free(node);
2378 }
2379 else if (PyErr_Occurred()) {
2380 return -1;
2381 }
2382 return 0;
2383 }
2384
2385 /* Moves a node to the front of the LRU cache.
2386 *
2387 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2388 * it is not at the front of the cache, it needs to be moved there.
2389 */
2390 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2391 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2392 {
2393 StrongCacheNode *root_p = *root;
2394 if (root_p == node) {
2395 return;
2396 }
2397
2398 remove_from_strong_cache(node);
2399
2400 node->prev = NULL;
2401 node->next = root_p;
2402
2403 if (root_p != NULL) {
2404 root_p->prev = node;
2405 }
2406
2407 *root = node;
2408 }
2409
2410 /* Retrieves a ZoneInfo from the strong cache if it's present.
2411 *
2412 * This function finds the ZoneInfo by key and if found will move the node to
2413 * the front of the LRU cache and return a new reference to it. It returns NULL
2414 * if the key is not in the cache.
2415 *
2416 * The strong cache is currently only implemented for the base class, so this
2417 * always returns a cache miss for subclasses.
2418 */
2419 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2420 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2421 {
2422 if (type != &PyZoneInfo_ZoneInfoType) {
2423 return NULL; // Strong cache currently only implemented for base class
2424 }
2425
2426 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2427
2428 if (node != NULL) {
2429 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2430 Py_INCREF(node->zone);
2431 return node->zone;
2432 }
2433
2434 return NULL; // Cache miss
2435 }
2436
2437 /* Inserts a new key into the strong LRU cache.
2438 *
2439 * This function is only to be used after a cache miss — it creates a new node
2440 * at the front of the cache and ejects any stale entries (keeping the size of
2441 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2442 */
2443 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2444 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2445 PyObject *zone)
2446 {
2447 if (type != &PyZoneInfo_ZoneInfoType) {
2448 return;
2449 }
2450
2451 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2452
2453 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2454
2455 StrongCacheNode *node = new_node->next;
2456 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2457 if (node == NULL) {
2458 return;
2459 }
2460 node = node->next;
2461 }
2462
2463 // Everything beyond this point needs to be freed
2464 if (node != NULL) {
2465 if (node->prev != NULL) {
2466 node->prev->next = NULL;
2467 }
2468 strong_cache_free(node);
2469 }
2470 }
2471
2472 /* Clears all entries into a type's strong cache.
2473 *
2474 * Because the strong cache is not implemented for subclasses, this is a no-op
2475 * for everything except the base class.
2476 */
2477 void
clear_strong_cache(const PyTypeObject * const type)2478 clear_strong_cache(const PyTypeObject *const type)
2479 {
2480 if (type != &PyZoneInfo_ZoneInfoType) {
2481 return;
2482 }
2483
2484 strong_cache_free(ZONEINFO_STRONG_CACHE);
2485 ZONEINFO_STRONG_CACHE = NULL;
2486 }
2487
2488 static PyObject *
new_weak_cache(void)2489 new_weak_cache(void)
2490 {
2491 PyObject *weakref_module = PyImport_ImportModule("weakref");
2492 if (weakref_module == NULL) {
2493 return NULL;
2494 }
2495
2496 PyObject *weak_cache =
2497 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2498 Py_DECREF(weakref_module);
2499 return weak_cache;
2500 }
2501
2502 static int
initialize_caches(void)2503 initialize_caches(void)
2504 {
2505 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2506 if (TIMEDELTA_CACHE == NULL) {
2507 TIMEDELTA_CACHE = PyDict_New();
2508 }
2509 else {
2510 Py_INCREF(TIMEDELTA_CACHE);
2511 }
2512
2513 if (TIMEDELTA_CACHE == NULL) {
2514 return -1;
2515 }
2516
2517 if (ZONEINFO_WEAK_CACHE == NULL) {
2518 ZONEINFO_WEAK_CACHE = new_weak_cache();
2519 }
2520 else {
2521 Py_INCREF(ZONEINFO_WEAK_CACHE);
2522 }
2523
2524 if (ZONEINFO_WEAK_CACHE == NULL) {
2525 return -1;
2526 }
2527
2528 return 0;
2529 }
2530
2531 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2532 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2533 {
2534 PyObject *weak_cache = new_weak_cache();
2535 if (weak_cache == NULL) {
2536 return NULL;
2537 }
2538
2539 if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2540 weak_cache) < 0) {
2541 Py_DECREF(weak_cache);
2542 return NULL;
2543 }
2544 Py_DECREF(weak_cache);
2545 Py_RETURN_NONE;
2546 }
2547
2548 /////
2549 // Specify the ZoneInfo type
2550 static PyMethodDef zoneinfo_methods[] = {
2551 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2552 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2553 PyDoc_STR("Clear the ZoneInfo cache.")},
2554 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2555 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2556 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2557 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2558 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2559 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2560 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2561 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2562 "the given datetime.")},
2563 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2564 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2565 "in a zone at the given datetime.")},
2566 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2567 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2568 "zone that applies in a zone at a given datetime.")},
2569 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2570 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2571 "datetime in local time.")},
2572 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2573 PyDoc_STR("Function for serialization with the pickle protocol.")},
2574 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2575 PyDoc_STR("Private method used in unpickling.")},
2576 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2577 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2578 PyDoc_STR("Function to initialize subclasses.")},
2579 {NULL} /* Sentinel */
2580 };
2581
2582 static PyMemberDef zoneinfo_members[] = {
2583 {.name = "key",
2584 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2585 .type = T_OBJECT_EX,
2586 .flags = READONLY,
2587 .doc = NULL},
2588 {NULL}, /* Sentinel */
2589 };
2590
2591 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2592 PyVarObject_HEAD_INIT(NULL, 0) //
2593 .tp_name = "zoneinfo.ZoneInfo",
2594 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2595 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2596 .tp_repr = (reprfunc)zoneinfo_repr,
2597 .tp_str = (reprfunc)zoneinfo_str,
2598 .tp_getattro = PyObject_GenericGetAttr,
2599 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2600 /* .tp_doc = zoneinfo_doc, */
2601 .tp_methods = zoneinfo_methods,
2602 .tp_members = zoneinfo_members,
2603 .tp_new = zoneinfo_new,
2604 .tp_dealloc = zoneinfo_dealloc,
2605 };
2606
2607 /////
2608 // Specify the _zoneinfo module
2609 static PyMethodDef module_methods[] = {{NULL, NULL}};
2610 static void
module_free(void)2611 module_free(void)
2612 {
2613 Py_XDECREF(_tzpath_find_tzfile);
2614 _tzpath_find_tzfile = NULL;
2615
2616 Py_XDECREF(_common_mod);
2617 _common_mod = NULL;
2618
2619 Py_XDECREF(io_open);
2620 io_open = NULL;
2621
2622 xdecref_ttinfo(&NO_TTINFO);
2623
2624 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2625 Py_DECREF(TIMEDELTA_CACHE);
2626 } else {
2627 Py_CLEAR(TIMEDELTA_CACHE);
2628 }
2629
2630 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2631 Py_DECREF(ZONEINFO_WEAK_CACHE);
2632 } else {
2633 Py_CLEAR(ZONEINFO_WEAK_CACHE);
2634 }
2635
2636 clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2637 }
2638
2639 static int
zoneinfomodule_exec(PyObject * m)2640 zoneinfomodule_exec(PyObject *m)
2641 {
2642 PyDateTime_IMPORT;
2643 if (PyDateTimeAPI == NULL) {
2644 goto error;
2645 }
2646 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2647 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2648 goto error;
2649 }
2650
2651 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2652 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2653
2654 /* Populate imports */
2655 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2656 if (_tzpath_module == NULL) {
2657 goto error;
2658 }
2659
2660 _tzpath_find_tzfile =
2661 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2662 Py_DECREF(_tzpath_module);
2663 if (_tzpath_find_tzfile == NULL) {
2664 goto error;
2665 }
2666
2667 PyObject *io_module = PyImport_ImportModule("io");
2668 if (io_module == NULL) {
2669 goto error;
2670 }
2671
2672 io_open = PyObject_GetAttrString(io_module, "open");
2673 Py_DECREF(io_module);
2674 if (io_open == NULL) {
2675 goto error;
2676 }
2677
2678 _common_mod = PyImport_ImportModule("zoneinfo._common");
2679 if (_common_mod == NULL) {
2680 goto error;
2681 }
2682
2683 if (NO_TTINFO.utcoff == NULL) {
2684 NO_TTINFO.utcoff = Py_None;
2685 NO_TTINFO.dstoff = Py_None;
2686 NO_TTINFO.tzname = Py_None;
2687
2688 for (size_t i = 0; i < 3; ++i) {
2689 Py_INCREF(Py_None);
2690 }
2691 }
2692
2693 if (initialize_caches()) {
2694 goto error;
2695 }
2696
2697 return 0;
2698
2699 error:
2700 return -1;
2701 }
2702
2703 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2704 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2705
2706 static struct PyModuleDef zoneinfomodule = {
2707 PyModuleDef_HEAD_INIT,
2708 .m_name = "_zoneinfo",
2709 .m_doc = "C implementation of the zoneinfo module",
2710 .m_size = 0,
2711 .m_methods = module_methods,
2712 .m_slots = zoneinfomodule_slots,
2713 .m_free = (freefunc)module_free};
2714
2715 PyMODINIT_FUNC
PyInit__zoneinfo(void)2716 PyInit__zoneinfo(void)
2717 {
2718 return PyModuleDef_Init(&zoneinfomodule);
2719 }
2720