1 #include "Python.h"
2 #include "structmember.h"
3
4 #include <ctype.h>
5 #include <stddef.h>
6 #include <stdint.h>
7
8 #include "datetime.h"
9
10 // Imports
11 static PyObject *io_open = NULL;
12 static PyObject *_tzpath_find_tzfile = NULL;
13 static PyObject *_common_mod = NULL;
14
15 typedef struct TransitionRuleType TransitionRuleType;
16 typedef struct StrongCacheNode StrongCacheNode;
17
18 typedef struct {
19 PyObject *utcoff;
20 PyObject *dstoff;
21 PyObject *tzname;
22 long utcoff_seconds;
23 } _ttinfo;
24
25 typedef struct {
26 _ttinfo std;
27 _ttinfo dst;
28 int dst_diff;
29 TransitionRuleType *start;
30 TransitionRuleType *end;
31 unsigned char std_only;
32 } _tzrule;
33
34 typedef struct {
35 PyDateTime_TZInfo base;
36 PyObject *key;
37 PyObject *file_repr;
38 PyObject *weakreflist;
39 size_t num_transitions;
40 size_t num_ttinfos;
41 int64_t *trans_list_utc;
42 int64_t *trans_list_wall[2];
43 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
44 _ttinfo *ttinfo_before;
45 _tzrule tzrule_after;
46 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
47 unsigned char fixed_offset;
48 unsigned char source;
49 } PyZoneInfo_ZoneInfo;
50
51 struct TransitionRuleType {
52 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53 };
54
55 typedef struct {
56 TransitionRuleType base;
57 uint8_t month;
58 uint8_t week;
59 uint8_t day;
60 int8_t hour;
61 int8_t minute;
62 int8_t second;
63 } CalendarRule;
64
65 typedef struct {
66 TransitionRuleType base;
67 uint8_t julian;
68 unsigned int day;
69 int8_t hour;
70 int8_t minute;
71 int8_t second;
72 } DayRule;
73
74 struct StrongCacheNode {
75 StrongCacheNode *next;
76 StrongCacheNode *prev;
77 PyObject *key;
78 PyObject *zone;
79 };
80
81 static PyTypeObject PyZoneInfo_ZoneInfoType;
82
83 // Globals
84 static PyObject *TIMEDELTA_CACHE = NULL;
85 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88
89 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90
91 // Constants
92 static const int EPOCHORDINAL = 719163;
93 static int DAYS_IN_MONTH[] = {
94 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95 };
96
97 static int DAYS_BEFORE_MONTH[] = {
98 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99 };
100
101 static const int SOURCE_NOCACHE = 0;
102 static const int SOURCE_CACHE = 1;
103 static const int SOURCE_FILE = 2;
104
105 // Forward declarations
106 static int
107 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108 static void
109 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110 unsigned char *isdsts, size_t num_transitions,
111 size_t num_ttinfos);
112 static int
113 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114 int64_t *trans_local[2], size_t num_ttinfos,
115 size_t num_transitions);
116
117 static int
118 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119
120 static Py_ssize_t
121 parse_abbr(const char *const p, PyObject **abbr);
122 static Py_ssize_t
123 parse_tz_delta(const char *const p, long *total_seconds);
124 static Py_ssize_t
125 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126 int8_t *second);
127 static Py_ssize_t
128 parse_transition_rule(const char *const p, TransitionRuleType **out);
129
130 static _ttinfo *
131 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132 static _ttinfo *
133 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134 unsigned char *fold);
135
136 static int
137 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138 static void
139 xdecref_ttinfo(_ttinfo *ttinfo);
140 static int
141 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142
143 static int
144 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145 long dst_offset, TransitionRuleType *start,
146 TransitionRuleType *end, _tzrule *out);
147 static void
148 free_tzrule(_tzrule *tzrule);
149
150 static PyObject *
151 load_timedelta(long seconds);
152
153 static int
154 get_local_timestamp(PyObject *dt, int64_t *local_ts);
155 static _ttinfo *
156 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157
158 static int
159 ymd_to_ord(int y, int m, int d);
160 static int
161 is_leap_year(int year);
162
163 static size_t
164 _bisect(const int64_t value, const int64_t *arr, size_t size);
165
166 static int
167 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168 static void
169 clear_strong_cache(const PyTypeObject *const type);
170 static void
171 update_strong_cache(const PyTypeObject *const type, PyObject *key,
172 PyObject *zone);
173 static PyObject *
174 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
175
176 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)177 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178 {
179 PyObject *file_obj = NULL;
180 PyObject *file_path = NULL;
181
182 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183 if (file_path == NULL) {
184 return NULL;
185 }
186 else if (file_path == Py_None) {
187 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188 if (file_obj == NULL) {
189 Py_DECREF(file_path);
190 return NULL;
191 }
192 }
193
194 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195 if (self == NULL) {
196 goto error;
197 }
198
199 if (file_obj == NULL) {
200 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201 if (file_obj == NULL) {
202 goto error;
203 }
204 }
205
206 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207 goto error;
208 }
209
210 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211 Py_DECREF(file_obj);
212 file_obj = NULL;
213 if (rv == NULL) {
214 goto error;
215 }
216 Py_DECREF(rv);
217
218 ((PyZoneInfo_ZoneInfo *)self)->key = key;
219 Py_INCREF(key);
220
221 goto cleanup;
222 error:
223 Py_XDECREF(self);
224 self = NULL;
225 cleanup:
226 if (file_obj != NULL) {
227 PyObject *exc, *val, *tb;
228 PyErr_Fetch(&exc, &val, &tb);
229 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
230 _PyErr_ChainExceptions(exc, val, tb);
231 if (tmp == NULL) {
232 Py_CLEAR(self);
233 }
234 Py_XDECREF(tmp);
235 Py_DECREF(file_obj);
236 }
237 Py_DECREF(file_path);
238 return self;
239 }
240
241 static PyObject *
get_weak_cache(PyTypeObject * type)242 get_weak_cache(PyTypeObject *type)
243 {
244 if (type == &PyZoneInfo_ZoneInfoType) {
245 return ZONEINFO_WEAK_CACHE;
246 }
247 else {
248 PyObject *cache =
249 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
250 // We are assuming that the type lives at least as long as the function
251 // that calls get_weak_cache, and that it holds a reference to the
252 // cache, so we'll return a "borrowed reference".
253 Py_XDECREF(cache);
254 return cache;
255 }
256 }
257
258 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)259 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
260 {
261 PyObject *key = NULL;
262 static char *kwlist[] = {"key", NULL};
263 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
264 return NULL;
265 }
266
267 PyObject *instance = zone_from_strong_cache(type, key);
268 if (instance != NULL || PyErr_Occurred()) {
269 return instance;
270 }
271
272 PyObject *weak_cache = get_weak_cache(type);
273 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
274 if (instance == NULL) {
275 return NULL;
276 }
277
278 if (instance == Py_None) {
279 Py_DECREF(instance);
280 PyObject *tmp = zoneinfo_new_instance(type, key);
281 if (tmp == NULL) {
282 return NULL;
283 }
284
285 instance =
286 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
287 Py_DECREF(tmp);
288 if (instance == NULL) {
289 return NULL;
290 }
291 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
292 }
293
294 update_strong_cache(type, key, instance);
295 return instance;
296 }
297
298 static void
zoneinfo_dealloc(PyObject * obj_self)299 zoneinfo_dealloc(PyObject *obj_self)
300 {
301 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
302
303 if (self->weakreflist != NULL) {
304 PyObject_ClearWeakRefs(obj_self);
305 }
306
307 if (self->trans_list_utc != NULL) {
308 PyMem_Free(self->trans_list_utc);
309 }
310
311 for (size_t i = 0; i < 2; i++) {
312 if (self->trans_list_wall[i] != NULL) {
313 PyMem_Free(self->trans_list_wall[i]);
314 }
315 }
316
317 if (self->_ttinfos != NULL) {
318 for (size_t i = 0; i < self->num_ttinfos; ++i) {
319 xdecref_ttinfo(&(self->_ttinfos[i]));
320 }
321 PyMem_Free(self->_ttinfos);
322 }
323
324 if (self->trans_ttinfos != NULL) {
325 PyMem_Free(self->trans_ttinfos);
326 }
327
328 free_tzrule(&(self->tzrule_after));
329
330 Py_XDECREF(self->key);
331 Py_XDECREF(self->file_repr);
332
333 Py_TYPE(self)->tp_free((PyObject *)self);
334 }
335
336 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)337 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
338 {
339 PyObject *file_obj = NULL;
340 PyObject *file_repr = NULL;
341 PyObject *key = Py_None;
342 PyZoneInfo_ZoneInfo *self = NULL;
343
344 static char *kwlist[] = {"", "key", NULL};
345 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
346 &key)) {
347 return NULL;
348 }
349
350 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
351 self = (PyZoneInfo_ZoneInfo *)obj_self;
352 if (self == NULL) {
353 return NULL;
354 }
355
356 file_repr = PyUnicode_FromFormat("%R", file_obj);
357 if (file_repr == NULL) {
358 goto error;
359 }
360
361 if (load_data(self, file_obj)) {
362 goto error;
363 }
364
365 self->source = SOURCE_FILE;
366 self->file_repr = file_repr;
367 self->key = key;
368 Py_INCREF(key);
369
370 return obj_self;
371 error:
372 Py_XDECREF(file_repr);
373 Py_XDECREF(self);
374 return NULL;
375 }
376
377 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)378 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
379 {
380 static char *kwlist[] = {"key", NULL};
381 PyObject *key = NULL;
382 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
383 return NULL;
384 }
385
386 PyObject *out = zoneinfo_new_instance(cls, key);
387 if (out != NULL) {
388 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
389 }
390
391 return out;
392 }
393
394 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)395 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
396 {
397 PyObject *only_keys = NULL;
398 static char *kwlist[] = {"only_keys", NULL};
399
400 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
401 &only_keys))) {
402 return NULL;
403 }
404
405 PyTypeObject *type = (PyTypeObject *)cls;
406 PyObject *weak_cache = get_weak_cache(type);
407
408 if (only_keys == NULL || only_keys == Py_None) {
409 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
410 if (rv != NULL) {
411 Py_DECREF(rv);
412 }
413
414 clear_strong_cache(type);
415 }
416 else {
417 PyObject *item = NULL;
418 PyObject *pop = PyUnicode_FromString("pop");
419 if (pop == NULL) {
420 return NULL;
421 }
422
423 PyObject *iter = PyObject_GetIter(only_keys);
424 if (iter == NULL) {
425 Py_DECREF(pop);
426 return NULL;
427 }
428
429 while ((item = PyIter_Next(iter))) {
430 // Remove from strong cache
431 if (eject_from_strong_cache(type, item) < 0) {
432 Py_DECREF(item);
433 break;
434 }
435
436 // Remove from weak cache
437 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
438 Py_None, NULL);
439
440 Py_DECREF(item);
441 if (tmp == NULL) {
442 break;
443 }
444 Py_DECREF(tmp);
445 }
446 Py_DECREF(iter);
447 Py_DECREF(pop);
448 }
449
450 if (PyErr_Occurred()) {
451 return NULL;
452 }
453
454 Py_RETURN_NONE;
455 }
456
457 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)458 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
459 {
460 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
461 if (tti == NULL) {
462 return NULL;
463 }
464 Py_INCREF(tti->utcoff);
465 return tti->utcoff;
466 }
467
468 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)469 zoneinfo_dst(PyObject *self, PyObject *dt)
470 {
471 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
472 if (tti == NULL) {
473 return NULL;
474 }
475 Py_INCREF(tti->dstoff);
476 return tti->dstoff;
477 }
478
479 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)480 zoneinfo_tzname(PyObject *self, PyObject *dt)
481 {
482 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
483 if (tti == NULL) {
484 return NULL;
485 }
486 Py_INCREF(tti->tzname);
487 return tti->tzname;
488 }
489
490 #define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo)
491 #define GET_DT_TZINFO(p) \
492 (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None)
493
494 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)495 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
496 {
497 if (!PyDateTime_Check(dt)) {
498 PyErr_SetString(PyExc_TypeError,
499 "fromutc: argument must be a datetime");
500 return NULL;
501 }
502 if (GET_DT_TZINFO(dt) != obj_self) {
503 PyErr_SetString(PyExc_ValueError,
504 "fromutc: dt.tzinfo "
505 "is not self");
506 return NULL;
507 }
508
509 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
510
511 int64_t timestamp;
512 if (get_local_timestamp(dt, ×tamp)) {
513 return NULL;
514 }
515 size_t num_trans = self->num_transitions;
516
517 _ttinfo *tti = NULL;
518 unsigned char fold = 0;
519
520 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
521 tti = self->ttinfo_before;
522 }
523 else if (num_trans == 0 ||
524 timestamp > self->trans_list_utc[num_trans - 1]) {
525 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
526 PyDateTime_GET_YEAR(dt), &fold);
527
528 // Immediately after the last manual transition, the fold/gap is
529 // between self->trans_ttinfos[num_transitions - 1] and whatever
530 // ttinfo applies immediately after the last transition, not between
531 // the STD and DST rules in the tzrule_after, so we may need to
532 // adjust the fold value.
533 if (num_trans) {
534 _ttinfo *tti_prev = NULL;
535 if (num_trans == 1) {
536 tti_prev = self->ttinfo_before;
537 }
538 else {
539 tti_prev = self->trans_ttinfos[num_trans - 2];
540 }
541 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
542 if (diff > 0 &&
543 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
544 fold = 1;
545 }
546 }
547 }
548 else {
549 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
550 _ttinfo *tti_prev = NULL;
551
552 if (idx >= 2) {
553 tti_prev = self->trans_ttinfos[idx - 2];
554 tti = self->trans_ttinfos[idx - 1];
555 }
556 else {
557 tti_prev = self->ttinfo_before;
558 tti = self->trans_ttinfos[0];
559 }
560
561 // Detect fold
562 int64_t shift =
563 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
564 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
565 fold = 1;
566 }
567 }
568
569 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
570 if (tmp == NULL) {
571 return NULL;
572 }
573
574 if (fold) {
575 if (PyDateTime_CheckExact(tmp)) {
576 ((PyDateTime_DateTime *)tmp)->fold = 1;
577 dt = tmp;
578 }
579 else {
580 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
581 PyObject *args = PyTuple_New(0);
582 PyObject *kwargs = PyDict_New();
583
584 Py_DECREF(tmp);
585 if (args == NULL || kwargs == NULL || replace == NULL) {
586 Py_XDECREF(args);
587 Py_XDECREF(kwargs);
588 Py_XDECREF(replace);
589 return NULL;
590 }
591
592 dt = NULL;
593 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
594 dt = PyObject_Call(replace, args, kwargs);
595 }
596
597 Py_DECREF(args);
598 Py_DECREF(kwargs);
599 Py_DECREF(replace);
600
601 if (dt == NULL) {
602 return NULL;
603 }
604 }
605 }
606 else {
607 dt = tmp;
608 }
609 return dt;
610 }
611
612 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)613 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
614 {
615 PyObject *rv = NULL;
616 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
617 if (!(self->key == Py_None)) {
618 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
619 }
620 else {
621 assert(PyUnicode_Check(self->file_repr));
622 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
623 self->file_repr);
624 }
625
626 return rv;
627 }
628
629 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)630 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
631 {
632 if (!(self->key == Py_None)) {
633 Py_INCREF(self->key);
634 return self->key;
635 }
636 else {
637 return zoneinfo_repr(self);
638 }
639 }
640
641 /* Pickles the ZoneInfo object by key and source.
642 *
643 * ZoneInfo objects are pickled by reference to the TZif file that they came
644 * from, which means that the exact transitions may be different or the file
645 * may not un-pickle if the data has changed on disk in the interim.
646 *
647 * It is necessary to include a bit indicating whether or not the object
648 * was constructed from the cache, because from-cache objects will hit the
649 * unpickling process's cache, whereas no-cache objects will bypass it.
650 *
651 * Objects constructed from ZoneInfo.from_file cannot be pickled.
652 */
653 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)654 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
655 {
656 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
657 if (self->source == SOURCE_FILE) {
658 // Objects constructed from files cannot be pickled.
659 PyObject *pickle = PyImport_ImportModule("pickle");
660 if (pickle == NULL) {
661 return NULL;
662 }
663
664 PyObject *pickle_error =
665 PyObject_GetAttrString(pickle, "PicklingError");
666 Py_DECREF(pickle);
667 if (pickle_error == NULL) {
668 return NULL;
669 }
670
671 PyErr_Format(pickle_error,
672 "Cannot pickle a ZoneInfo file from a file stream.");
673 Py_DECREF(pickle_error);
674 return NULL;
675 }
676
677 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
678 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
679
680 if (constructor == NULL) {
681 return NULL;
682 }
683
684 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
685 Py_DECREF(constructor);
686 return rv;
687 }
688
689 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)690 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
691 {
692 PyObject *key;
693 unsigned char from_cache;
694 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
695 return NULL;
696 }
697
698 if (from_cache) {
699 PyObject *val_args = Py_BuildValue("(O)", key);
700 if (val_args == NULL) {
701 return NULL;
702 }
703
704 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
705
706 Py_DECREF(val_args);
707 return rv;
708 }
709 else {
710 return zoneinfo_new_instance(cls, key);
711 }
712 }
713
714 /* It is relatively expensive to construct new timedelta objects, and in most
715 * cases we're looking at a relatively small number of timedeltas, such as
716 * integer number of hours, etc. We will keep a cache so that we construct
717 * a minimal number of these.
718 *
719 * Possibly this should be replaced with an LRU cache so that it's not possible
720 * for the memory usage to explode from this, but in order for this to be a
721 * serious problem, one would need to deliberately craft a malicious time zone
722 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
723 * fills the cache with ~450 timedeltas for a total size of ~12kB.
724 *
725 * This returns a new reference to the timedelta.
726 */
727 static PyObject *
load_timedelta(long seconds)728 load_timedelta(long seconds)
729 {
730 PyObject *rv = NULL;
731 PyObject *pyoffset = PyLong_FromLong(seconds);
732 if (pyoffset == NULL) {
733 return NULL;
734 }
735 int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset);
736 if (contains == -1) {
737 goto error;
738 }
739
740 if (!contains) {
741 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
742 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
743
744 if (tmp == NULL) {
745 goto error;
746 }
747
748 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
749 Py_DECREF(tmp);
750 }
751 else {
752 rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset);
753 }
754
755 Py_DECREF(pyoffset);
756 Py_INCREF(rv);
757 return rv;
758 error:
759 Py_DECREF(pyoffset);
760 return NULL;
761 }
762
763 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
764 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
765 * initialized _ttinfo objects.
766 */
767 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)768 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
769 {
770 out->utcoff = NULL;
771 out->dstoff = NULL;
772 out->tzname = NULL;
773
774 out->utcoff_seconds = utcoffset;
775 out->utcoff = load_timedelta(utcoffset);
776 if (out->utcoff == NULL) {
777 return -1;
778 }
779
780 out->dstoff = load_timedelta(dstoffset);
781 if (out->dstoff == NULL) {
782 return -1;
783 }
784
785 out->tzname = tzname;
786 Py_INCREF(tzname);
787
788 return 0;
789 }
790
791 /* Decrease reference count on any non-NULL members of a _ttinfo */
792 static void
xdecref_ttinfo(_ttinfo * ttinfo)793 xdecref_ttinfo(_ttinfo *ttinfo)
794 {
795 if (ttinfo != NULL) {
796 Py_XDECREF(ttinfo->utcoff);
797 Py_XDECREF(ttinfo->dstoff);
798 Py_XDECREF(ttinfo->tzname);
799 }
800 }
801
802 /* Equality function for _ttinfo. */
803 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)804 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
805 {
806 int rv;
807 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
808 1) {
809 goto end;
810 }
811
812 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
813 1) {
814 goto end;
815 }
816
817 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
818 1) {
819 goto end;
820 }
821 end:
822 return rv;
823 }
824
825 /* Given a file-like object, this populates a ZoneInfo object
826 *
827 * The current version calls into a Python function to read the data from
828 * file into Python objects, and this translates those Python objects into
829 * C values and calculates derived values (e.g. dstoff) in C.
830 *
831 * This returns 0 on success and -1 on failure.
832 *
833 * The function will never return while `self` is partially initialized —
834 * the object only needs to be freed / deallocated if this succeeds.
835 */
836 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)837 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
838 {
839 PyObject *data_tuple = NULL;
840
841 long *utcoff = NULL;
842 long *dstoff = NULL;
843 size_t *trans_idx = NULL;
844 unsigned char *isdst = NULL;
845
846 self->trans_list_utc = NULL;
847 self->trans_list_wall[0] = NULL;
848 self->trans_list_wall[1] = NULL;
849 self->trans_ttinfos = NULL;
850 self->_ttinfos = NULL;
851 self->file_repr = NULL;
852
853 size_t ttinfos_allocated = 0;
854
855 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
856
857 if (data_tuple == NULL) {
858 goto error;
859 }
860
861 if (!PyTuple_CheckExact(data_tuple)) {
862 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
863 data_tuple);
864 goto error;
865 }
866
867 // Unpack the data tuple
868 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
869 if (trans_idx_list == NULL) {
870 goto error;
871 }
872
873 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
874 if (trans_utc == NULL) {
875 goto error;
876 }
877
878 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
879 if (utcoff_list == NULL) {
880 goto error;
881 }
882
883 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
884 if (isdst_list == NULL) {
885 goto error;
886 }
887
888 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
889 if (abbr == NULL) {
890 goto error;
891 }
892
893 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
894 if (tz_str == NULL) {
895 goto error;
896 }
897
898 // Load the relevant sizes
899 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
900 if (num_transitions < 0) {
901 goto error;
902 }
903
904 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
905 if (num_ttinfos < 0) {
906 goto error;
907 }
908
909 self->num_transitions = (size_t)num_transitions;
910 self->num_ttinfos = (size_t)num_ttinfos;
911
912 // Load the transition indices and list
913 self->trans_list_utc =
914 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
915 if (self->trans_list_utc == NULL) {
916 goto error;
917 }
918 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
919 if (trans_idx == NULL) {
920 goto error;
921 }
922
923 for (size_t i = 0; i < self->num_transitions; ++i) {
924 PyObject *num = PyTuple_GetItem(trans_utc, i);
925 if (num == NULL) {
926 goto error;
927 }
928 self->trans_list_utc[i] = PyLong_AsLongLong(num);
929 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
930 goto error;
931 }
932
933 num = PyTuple_GetItem(trans_idx_list, i);
934 if (num == NULL) {
935 goto error;
936 }
937
938 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
939 if (cur_trans_idx == -1) {
940 goto error;
941 }
942
943 trans_idx[i] = (size_t)cur_trans_idx;
944 if (trans_idx[i] > self->num_ttinfos) {
945 PyErr_Format(
946 PyExc_ValueError,
947 "Invalid transition index found while reading TZif: %zd",
948 cur_trans_idx);
949
950 goto error;
951 }
952 }
953
954 // Load UTC offsets and isdst (size num_ttinfos)
955 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
956 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
957
958 if (utcoff == NULL || isdst == NULL) {
959 goto error;
960 }
961 for (size_t i = 0; i < self->num_ttinfos; ++i) {
962 PyObject *num = PyTuple_GetItem(utcoff_list, i);
963 if (num == NULL) {
964 goto error;
965 }
966
967 utcoff[i] = PyLong_AsLong(num);
968 if (utcoff[i] == -1 && PyErr_Occurred()) {
969 goto error;
970 }
971
972 num = PyTuple_GetItem(isdst_list, i);
973 if (num == NULL) {
974 goto error;
975 }
976
977 int isdst_with_error = PyObject_IsTrue(num);
978 if (isdst_with_error == -1) {
979 goto error;
980 }
981 else {
982 isdst[i] = (unsigned char)isdst_with_error;
983 }
984 }
985
986 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
987 if (dstoff == NULL) {
988 goto error;
989 }
990
991 // Derive dstoff and trans_list_wall from the information we've loaded
992 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
993 self->num_ttinfos);
994
995 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
996 self->trans_list_wall, self->num_ttinfos,
997 self->num_transitions)) {
998 goto error;
999 }
1000
1001 // Build _ttinfo objects from utcoff, dstoff and abbr
1002 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1003 if (self->_ttinfos == NULL) {
1004 goto error;
1005 }
1006 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1007 PyObject *tzname = PyTuple_GetItem(abbr, i);
1008 if (tzname == NULL) {
1009 goto error;
1010 }
1011
1012 ttinfos_allocated++;
1013 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1014 goto error;
1015 }
1016 }
1017
1018 // Build our mapping from transition to the ttinfo that applies
1019 self->trans_ttinfos =
1020 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1021 if (self->trans_ttinfos == NULL) {
1022 goto error;
1023 }
1024 for (size_t i = 0; i < self->num_transitions; ++i) {
1025 size_t ttinfo_idx = trans_idx[i];
1026 assert(ttinfo_idx < self->num_ttinfos);
1027 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1028 }
1029
1030 // Set ttinfo_before to the first non-DST transition
1031 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1032 if (!isdst[i]) {
1033 self->ttinfo_before = &(self->_ttinfos[i]);
1034 break;
1035 }
1036 }
1037
1038 // If there are only DST ttinfos, pick the first one, if there are no
1039 // ttinfos at all, set ttinfo_before to NULL
1040 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1041 self->ttinfo_before = &(self->_ttinfos[0]);
1042 }
1043
1044 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1045 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1046 goto error;
1047 }
1048 }
1049 else {
1050 if (!self->num_ttinfos) {
1051 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1052 goto error;
1053 }
1054
1055 size_t idx;
1056 if (!self->num_transitions) {
1057 idx = self->num_ttinfos - 1;
1058 }
1059 else {
1060 idx = trans_idx[self->num_transitions - 1];
1061 }
1062
1063 _ttinfo *tti = &(self->_ttinfos[idx]);
1064 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1065 &(self->tzrule_after));
1066
1067 // We've abused the build_tzrule constructor to construct an STD-only
1068 // rule mimicking whatever ttinfo we've picked up, but it's possible
1069 // that the one we've picked up is a DST zone, so we need to make sure
1070 // that the dstoff is set correctly in that case.
1071 if (PyObject_IsTrue(tti->dstoff)) {
1072 _ttinfo *tti_after = &(self->tzrule_after.std);
1073 Py_DECREF(tti_after->dstoff);
1074 tti_after->dstoff = tti->dstoff;
1075 Py_INCREF(tti_after->dstoff);
1076 }
1077 }
1078
1079 // Determine if this is a "fixed offset" zone, meaning that the output of
1080 // the utcoffset, dst and tzname functions does not depend on the specific
1081 // datetime passed.
1082 //
1083 // We make three simplifying assumptions here:
1084 //
1085 // 1. If tzrule_after is not std_only, it has transitions that might occur
1086 // (it is possible to construct TZ strings that specify STD and DST but
1087 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1088 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1089 // represent different offsets.
1090 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1091 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1092 // a fixed offset zone).
1093 //
1094 // Violations to these assumptions would be fairly exotic, and exotic
1095 // zones should almost certainly not be used with datetime.time (the
1096 // only thing that would be affected by this).
1097 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1098 self->fixed_offset = 0;
1099 }
1100 else if (self->num_ttinfos == 0) {
1101 self->fixed_offset = 1;
1102 }
1103 else {
1104 int constant_offset =
1105 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1106 if (constant_offset < 0) {
1107 goto error;
1108 }
1109 else {
1110 self->fixed_offset = constant_offset;
1111 }
1112 }
1113
1114 int rv = 0;
1115 goto cleanup;
1116 error:
1117 // These resources only need to be freed if we have failed, if we succeed
1118 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1119 // method to free the relevant resources.
1120 if (self->trans_list_utc != NULL) {
1121 PyMem_Free(self->trans_list_utc);
1122 self->trans_list_utc = NULL;
1123 }
1124
1125 for (size_t i = 0; i < 2; ++i) {
1126 if (self->trans_list_wall[i] != NULL) {
1127 PyMem_Free(self->trans_list_wall[i]);
1128 self->trans_list_wall[i] = NULL;
1129 }
1130 }
1131
1132 if (self->_ttinfos != NULL) {
1133 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1134 xdecref_ttinfo(&(self->_ttinfos[i]));
1135 }
1136 PyMem_Free(self->_ttinfos);
1137 self->_ttinfos = NULL;
1138 }
1139
1140 if (self->trans_ttinfos != NULL) {
1141 PyMem_Free(self->trans_ttinfos);
1142 self->trans_ttinfos = NULL;
1143 }
1144
1145 rv = -1;
1146 cleanup:
1147 Py_XDECREF(data_tuple);
1148
1149 if (utcoff != NULL) {
1150 PyMem_Free(utcoff);
1151 }
1152
1153 if (dstoff != NULL) {
1154 PyMem_Free(dstoff);
1155 }
1156
1157 if (isdst != NULL) {
1158 PyMem_Free(isdst);
1159 }
1160
1161 if (trans_idx != NULL) {
1162 PyMem_Free(trans_idx);
1163 }
1164
1165 return rv;
1166 }
1167
1168 /* Function to calculate the local timestamp of a transition from the year. */
1169 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1170 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1171 {
1172 CalendarRule *self = (CalendarRule *)base_self;
1173
1174 // We want (year, month, day of month); we have year and month, but we
1175 // need to turn (week, day-of-week) into day-of-month
1176 //
1177 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1178 // Week 5 represents the last occurrence of day `day`, so we need to know
1179 // the first weekday of the month and the number of days in the month.
1180 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1181 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1182 if (self->month == 2 && is_leap_year(year)) {
1183 days_in_month += 1;
1184 }
1185
1186 // This equation seems magical, so I'll break it down:
1187 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1188 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1189 // because this math is mod 7
1190 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1191 // numbers so that -1 % 7 = 6).
1192 // 3. Add 1 because month days are a 1-based index.
1193 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1194 if (month_day < 0) {
1195 month_day += 7;
1196 }
1197 month_day += 1;
1198
1199 // Now use a 0-based index version of `week` to calculate the w-th
1200 // occurrence of `day`
1201 month_day += ((int8_t)(self->week) - 1) * 7;
1202
1203 // month_day will only be > days_in_month if w was 5, and `w` means "last
1204 // occurrence of `d`", so now we just check if we over-shot the end of the
1205 // month and if so knock off 1 week.
1206 if (month_day > days_in_month) {
1207 month_day -= 7;
1208 }
1209
1210 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1211 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1212 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1213 }
1214
1215 /* Constructor for CalendarRule. */
1216 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1217 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1218 int8_t minute, int8_t second, CalendarRule *out)
1219 {
1220 // These bounds come from the POSIX standard, which describes an Mm.n.d
1221 // rule as:
1222 //
1223 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1224 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1225 // may occur in either the fourth or the fifth week). Week 1 is the first
1226 // week in which the d'th day occurs. Day zero is Sunday.
1227 if (month <= 0 || month > 12) {
1228 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1229 return -1;
1230 }
1231
1232 if (week <= 0 || week > 5) {
1233 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1234 return -1;
1235 }
1236
1237 // If the 'day' parameter type is changed to a signed type,
1238 // "day < 0" check must be added.
1239 if (/* day < 0 || */ day > 6) {
1240 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1241 return -1;
1242 }
1243
1244 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1245
1246 CalendarRule new_offset = {
1247 .base = base,
1248 .month = month,
1249 .week = week,
1250 .day = day,
1251 .hour = hour,
1252 .minute = minute,
1253 .second = second,
1254 };
1255
1256 *out = new_offset;
1257 return 0;
1258 }
1259
1260 /* Function to calculate the local timestamp of a transition from the year.
1261 *
1262 * This translates the day of the year into a local timestamp — either a
1263 * 1-based Julian day, not including leap days, or the 0-based year-day,
1264 * including leap days.
1265 * */
1266 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1267 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1268 {
1269 // The function signature requires a TransitionRuleType pointer, but this
1270 // function is only applicable to DayRule* objects.
1271 DayRule *self = (DayRule *)base_self;
1272
1273 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1274 // to know the number of days since 1970-01-01, so we must subtract off
1275 // the equivalent of ymd_to_ord(1970, 1, 1).
1276 //
1277 // We subtract off an additional 1 day to account for January 1st (we want
1278 // the number of full days *before* the date of the transition - partial
1279 // days are accounted for in the hour, minute and second portions.
1280 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1281
1282 // The Julian day specification skips over February 29th in leap years,
1283 // from the POSIX standard:
1284 //
1285 // Leap days shall not be counted. That is, in all years-including leap
1286 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1287 // refer explicitly to the occasional February 29.
1288 //
1289 // This is actually more useful than you'd think — if you want a rule that
1290 // always transitions on a given calendar day (other than February 29th),
1291 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1292 // always refers to December 31st.
1293 unsigned int day = self->day;
1294 if (self->julian && day >= 59 && is_leap_year(year)) {
1295 day += 1;
1296 }
1297
1298 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1299 (self->minute * 60) + self->second;
1300 }
1301
1302 /* Constructor for DayRule. */
1303 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1304 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1305 int8_t second, DayRule *out)
1306 {
1307 // The POSIX standard specifies that Julian days must be in the range (1 <=
1308 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1309 // be in the range (0 <= n <= 365).
1310 if (day < julian || day > 365) {
1311 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1312 julian, day);
1313 return -1;
1314 }
1315
1316 TransitionRuleType base = {
1317 &dayrule_year_to_timestamp,
1318 };
1319
1320 DayRule tmp = {
1321 .base = base,
1322 .julian = julian,
1323 .day = day,
1324 .hour = hour,
1325 .minute = minute,
1326 .second = second,
1327 };
1328
1329 *out = tmp;
1330
1331 return 0;
1332 }
1333
1334 /* Calculate the start and end rules for a _tzrule in the given year. */
1335 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1336 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1337 {
1338 assert(rule->start != NULL);
1339 assert(rule->end != NULL);
1340 *start = rule->start->year_to_timestamp(rule->start, year);
1341 *end = rule->end->year_to_timestamp(rule->end, year);
1342 }
1343
1344 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1345 *
1346 * This takes a local timestamp and fold for disambiguation purposes; the year
1347 * could technically be calculated from the timestamp, but given that the
1348 * callers of this function already have the year information accessible from
1349 * the datetime struct, it is taken as an additional parameter to reduce
1350 * unncessary calculation.
1351 * */
1352 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1353 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1354 {
1355 if (rule->std_only) {
1356 return &(rule->std);
1357 }
1358
1359 int64_t start, end;
1360 uint8_t isdst;
1361
1362 tzrule_transitions(rule, year, &start, &end);
1363
1364 // With fold = 0, the period (denominated in local time) with the smaller
1365 // offset starts at the end of the gap and ends at the end of the fold;
1366 // with fold = 1, it runs from the start of the gap to the beginning of the
1367 // fold.
1368 //
1369 // So in order to determine the DST boundaries we need to know both the
1370 // fold and whether DST is positive or negative (rare), and it turns out
1371 // that this boils down to fold XOR is_positive.
1372 if (fold == (rule->dst_diff >= 0)) {
1373 end -= rule->dst_diff;
1374 }
1375 else {
1376 start += rule->dst_diff;
1377 }
1378
1379 if (start < end) {
1380 isdst = (ts >= start) && (ts < end);
1381 }
1382 else {
1383 isdst = (ts < end) || (ts >= start);
1384 }
1385
1386 if (isdst) {
1387 return &(rule->dst);
1388 }
1389 else {
1390 return &(rule->std);
1391 }
1392 }
1393
1394 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1395 *
1396 * This function can determine the _ttinfo that applies at a given epoch time,
1397 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1398 * This is to be used in the .fromutc() function.
1399 *
1400 * The year is technically a redundant parameter, because it can be calculated
1401 * from the timestamp, but all callers of this function should have the year
1402 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1403 * calculation.
1404 **/
1405 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1406 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1407 unsigned char *fold)
1408 {
1409 if (rule->std_only) {
1410 *fold = 0;
1411 return &(rule->std);
1412 }
1413
1414 int64_t start, end;
1415 uint8_t isdst;
1416 tzrule_transitions(rule, year, &start, &end);
1417 start -= rule->std.utcoff_seconds;
1418 end -= rule->dst.utcoff_seconds;
1419
1420 if (start < end) {
1421 isdst = (ts >= start) && (ts < end);
1422 }
1423 else {
1424 isdst = (ts < end) || (ts >= start);
1425 }
1426
1427 // For positive DST, the ambiguous period is one dst_diff after the end of
1428 // DST; for negative DST, the ambiguous period is one dst_diff before the
1429 // start of DST.
1430 int64_t ambig_start, ambig_end;
1431 if (rule->dst_diff > 0) {
1432 ambig_start = end;
1433 ambig_end = end + rule->dst_diff;
1434 }
1435 else {
1436 ambig_start = start;
1437 ambig_end = start - rule->dst_diff;
1438 }
1439
1440 *fold = (ts >= ambig_start) && (ts < ambig_end);
1441
1442 if (isdst) {
1443 return &(rule->dst);
1444 }
1445 else {
1446 return &(rule->std);
1447 }
1448 }
1449
1450 /* Parse a TZ string in the format specified by the POSIX standard:
1451 *
1452 * std offset[dst[offset],start[/time],end[/time]]
1453 *
1454 * std and dst must be 3 or more characters long and must not contain a
1455 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1456 * spaces between "std" and "offset" are only for display and are not actually
1457 * present in the string.
1458 *
1459 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1460 *
1461 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1462 *
1463 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1464 */
1465 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1466 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1467 {
1468 PyObject *std_abbr = NULL;
1469 PyObject *dst_abbr = NULL;
1470 TransitionRuleType *start = NULL;
1471 TransitionRuleType *end = NULL;
1472 // Initialize offsets to invalid value (> 24 hours)
1473 long std_offset = 1 << 20;
1474 long dst_offset = 1 << 20;
1475
1476 char *tz_str = PyBytes_AsString(tz_str_obj);
1477 if (tz_str == NULL) {
1478 return -1;
1479 }
1480 char *p = tz_str;
1481
1482 // Read the `std` abbreviation, which must be at least 3 characters long.
1483 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1484 if (num_chars < 1) {
1485 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1486 goto error;
1487 }
1488
1489 p += num_chars;
1490
1491 // Now read the STD offset, which is required
1492 num_chars = parse_tz_delta(p, &std_offset);
1493 if (num_chars < 0) {
1494 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1495 goto error;
1496 }
1497 p += num_chars;
1498
1499 // If the string ends here, there is no DST, otherwise we must parse the
1500 // DST abbreviation and start and end dates and times.
1501 if (*p == '\0') {
1502 goto complete;
1503 }
1504
1505 num_chars = parse_abbr(p, &dst_abbr);
1506 if (num_chars < 1) {
1507 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1508 goto error;
1509 }
1510 p += num_chars;
1511
1512 if (*p == ',') {
1513 // From the POSIX standard:
1514 //
1515 // If no offset follows dst, the alternative time is assumed to be one
1516 // hour ahead of standard time.
1517 dst_offset = std_offset + 3600;
1518 }
1519 else {
1520 num_chars = parse_tz_delta(p, &dst_offset);
1521 if (num_chars < 0) {
1522 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1523 tz_str_obj);
1524 goto error;
1525 }
1526
1527 p += num_chars;
1528 }
1529
1530 TransitionRuleType **transitions[2] = {&start, &end};
1531 for (size_t i = 0; i < 2; ++i) {
1532 if (*p != ',') {
1533 PyErr_Format(PyExc_ValueError,
1534 "Missing transition rules in TZ string: %R",
1535 tz_str_obj);
1536 goto error;
1537 }
1538 p++;
1539
1540 num_chars = parse_transition_rule(p, transitions[i]);
1541 if (num_chars < 0) {
1542 PyErr_Format(PyExc_ValueError,
1543 "Malformed transition rule in TZ string: %R",
1544 tz_str_obj);
1545 goto error;
1546 }
1547 p += num_chars;
1548 }
1549
1550 if (*p != '\0') {
1551 PyErr_Format(PyExc_ValueError,
1552 "Extraneous characters at end of TZ string: %R",
1553 tz_str_obj);
1554 goto error;
1555 }
1556
1557 complete:
1558 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1559 Py_DECREF(std_abbr);
1560 Py_XDECREF(dst_abbr);
1561
1562 return 0;
1563 error:
1564 Py_XDECREF(std_abbr);
1565 if (dst_abbr != NULL && dst_abbr != Py_None) {
1566 Py_DECREF(dst_abbr);
1567 }
1568
1569 if (start != NULL) {
1570 PyMem_Free(start);
1571 }
1572
1573 if (end != NULL) {
1574 PyMem_Free(end);
1575 }
1576
1577 return -1;
1578 }
1579
1580 static int
parse_uint(const char * const p,uint8_t * value)1581 parse_uint(const char *const p, uint8_t *value)
1582 {
1583 if (!isdigit(*p)) {
1584 return -1;
1585 }
1586
1587 *value = (*p) - '0';
1588 return 0;
1589 }
1590
1591 /* Parse the STD and DST abbreviations from a TZ string. */
1592 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1593 parse_abbr(const char *const p, PyObject **abbr)
1594 {
1595 const char *ptr = p;
1596 char buff = *ptr;
1597 const char *str_start;
1598 const char *str_end;
1599
1600 if (*ptr == '<') {
1601 ptr++;
1602 str_start = ptr;
1603 while ((buff = *ptr) != '>') {
1604 // From the POSIX standard:
1605 //
1606 // In the quoted form, the first character shall be the less-than
1607 // ( '<' ) character and the last character shall be the
1608 // greater-than ( '>' ) character. All characters between these
1609 // quoting characters shall be alphanumeric characters from the
1610 // portable character set in the current locale, the plus-sign (
1611 // '+' ) character, or the minus-sign ( '-' ) character. The std
1612 // and dst fields in this case shall not include the quoting
1613 // characters.
1614 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1615 buff != '-') {
1616 return -1;
1617 }
1618 ptr++;
1619 }
1620 str_end = ptr;
1621 ptr++;
1622 }
1623 else {
1624 str_start = p;
1625 // From the POSIX standard:
1626 //
1627 // In the unquoted form, all characters in these fields shall be
1628 // alphabetic characters from the portable character set in the
1629 // current locale.
1630 while (isalpha(*ptr)) {
1631 ptr++;
1632 }
1633 str_end = ptr;
1634 }
1635
1636 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1637 if (*abbr == NULL) {
1638 return -1;
1639 }
1640
1641 return ptr - p;
1642 }
1643
1644 /* Parse a UTC offset from a TZ str. */
1645 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1646 parse_tz_delta(const char *const p, long *total_seconds)
1647 {
1648 // From the POSIX spec:
1649 //
1650 // Indicates the value added to the local time to arrive at Coordinated
1651 // Universal Time. The offset has the form:
1652 //
1653 // hh[:mm[:ss]]
1654 //
1655 // One or more digits may be used; the value is always interpreted as a
1656 // decimal number.
1657 //
1658 // The POSIX spec says that the values for `hour` must be between 0 and 24
1659 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1660 // transition times may be signed and range from -167 to 167.
1661 long sign = -1;
1662 long hours = 0;
1663 long minutes = 0;
1664 long seconds = 0;
1665
1666 const char *ptr = p;
1667 char buff = *ptr;
1668 if (buff == '-' || buff == '+') {
1669 // Negative numbers correspond to *positive* offsets, from the spec:
1670 //
1671 // If preceded by a '-', the timezone shall be east of the Prime
1672 // Meridian; otherwise, it shall be west (which may be indicated by
1673 // an optional preceding '+' ).
1674 if (buff == '-') {
1675 sign = 1;
1676 }
1677
1678 ptr++;
1679 }
1680
1681 // The hour can be 1 or 2 numeric characters
1682 for (size_t i = 0; i < 2; ++i) {
1683 buff = *ptr;
1684 if (!isdigit(buff)) {
1685 if (i == 0) {
1686 return -1;
1687 }
1688 else {
1689 break;
1690 }
1691 }
1692
1693 hours *= 10;
1694 hours += buff - '0';
1695 ptr++;
1696 }
1697
1698 if (hours > 24 || hours < 0) {
1699 return -1;
1700 }
1701
1702 // Minutes and seconds always of the format ":dd"
1703 long *outputs[2] = {&minutes, &seconds};
1704 for (size_t i = 0; i < 2; ++i) {
1705 if (*ptr != ':') {
1706 goto complete;
1707 }
1708 ptr++;
1709
1710 for (size_t j = 0; j < 2; ++j) {
1711 buff = *ptr;
1712 if (!isdigit(buff)) {
1713 return -1;
1714 }
1715 *(outputs[i]) *= 10;
1716 *(outputs[i]) += buff - '0';
1717 ptr++;
1718 }
1719 }
1720
1721 complete:
1722 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1723
1724 return ptr - p;
1725 }
1726
1727 /* Parse the date portion of a transition rule. */
1728 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1729 parse_transition_rule(const char *const p, TransitionRuleType **out)
1730 {
1731 // The full transition rule indicates when to change back and forth between
1732 // STD and DST, and has the form:
1733 //
1734 // date[/time],date[/time]
1735 //
1736 // This function parses an individual date[/time] section, and returns
1737 // the number of characters that contributed to the transition rule. This
1738 // does not include the ',' at the end of the first rule.
1739 //
1740 // The POSIX spec states that if *time* is not given, the default is 02:00.
1741 const char *ptr = p;
1742 int8_t hour = 2;
1743 int8_t minute = 0;
1744 int8_t second = 0;
1745
1746 // Rules come in one of three flavors:
1747 //
1748 // 1. Jn: Julian day n, with no leap days.
1749 // 2. n: Day of year (0-based, with leap days)
1750 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1751
1752 if (*ptr == 'M') {
1753 uint8_t month, week, day;
1754 ptr++;
1755 if (parse_uint(ptr, &month)) {
1756 return -1;
1757 }
1758 ptr++;
1759 if (*ptr != '.') {
1760 uint8_t tmp;
1761 if (parse_uint(ptr, &tmp)) {
1762 return -1;
1763 }
1764
1765 month *= 10;
1766 month += tmp;
1767 ptr++;
1768 }
1769
1770 uint8_t *values[2] = {&week, &day};
1771 for (size_t i = 0; i < 2; ++i) {
1772 if (*ptr != '.') {
1773 return -1;
1774 }
1775 ptr++;
1776
1777 if (parse_uint(ptr, values[i])) {
1778 return -1;
1779 }
1780 ptr++;
1781 }
1782
1783 if (*ptr == '/') {
1784 ptr++;
1785 Py_ssize_t num_chars =
1786 parse_transition_time(ptr, &hour, &minute, &second);
1787 if (num_chars < 0) {
1788 return -1;
1789 }
1790 ptr += num_chars;
1791 }
1792
1793 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1794 if (rv == NULL) {
1795 return -1;
1796 }
1797
1798 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1799 PyMem_Free(rv);
1800 return -1;
1801 }
1802
1803 *out = (TransitionRuleType *)rv;
1804 }
1805 else {
1806 uint8_t julian = 0;
1807 unsigned int day = 0;
1808 if (*ptr == 'J') {
1809 julian = 1;
1810 ptr++;
1811 }
1812
1813 for (size_t i = 0; i < 3; ++i) {
1814 if (!isdigit(*ptr)) {
1815 if (i == 0) {
1816 return -1;
1817 }
1818 break;
1819 }
1820 day *= 10;
1821 day += (*ptr) - '0';
1822 ptr++;
1823 }
1824
1825 if (*ptr == '/') {
1826 ptr++;
1827 Py_ssize_t num_chars =
1828 parse_transition_time(ptr, &hour, &minute, &second);
1829 if (num_chars < 0) {
1830 return -1;
1831 }
1832 ptr += num_chars;
1833 }
1834
1835 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1836 if (rv == NULL) {
1837 return -1;
1838 }
1839
1840 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1841 PyMem_Free(rv);
1842 return -1;
1843 }
1844 *out = (TransitionRuleType *)rv;
1845 }
1846
1847 return ptr - p;
1848 }
1849
1850 /* Parse the time portion of a transition rule (e.g. following an /) */
1851 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1852 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1853 int8_t *second)
1854 {
1855 // From the spec:
1856 //
1857 // The time has the same format as offset except that no leading sign
1858 // ( '-' or '+' ) is allowed.
1859 //
1860 // The format for the offset is:
1861 //
1862 // h[h][:mm[:ss]]
1863 //
1864 // RFC 8536 also allows transition times to be signed and to range from
1865 // -167 to +167, but the current version only supports [0, 99].
1866 //
1867 // TODO: Support the full range of transition hours.
1868 int8_t *components[3] = {hour, minute, second};
1869 const char *ptr = p;
1870 int8_t sign = 1;
1871
1872 if (*ptr == '-' || *ptr == '+') {
1873 if (*ptr == '-') {
1874 sign = -1;
1875 }
1876 ptr++;
1877 }
1878
1879 for (size_t i = 0; i < 3; ++i) {
1880 if (i > 0) {
1881 if (*ptr != ':') {
1882 break;
1883 }
1884 ptr++;
1885 }
1886
1887 uint8_t buff = 0;
1888 for (size_t j = 0; j < 2; j++) {
1889 if (!isdigit(*ptr)) {
1890 if (i == 0 && j > 0) {
1891 break;
1892 }
1893 return -1;
1894 }
1895
1896 buff *= 10;
1897 buff += (*ptr) - '0';
1898 ptr++;
1899 }
1900
1901 *(components[i]) = sign * buff;
1902 }
1903
1904 return ptr - p;
1905 }
1906
1907 /* Constructor for a _tzrule.
1908 *
1909 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1910 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1911 * NULL as well.
1912 *
1913 * Returns 0 on success.
1914 */
1915 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1916 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1917 long dst_offset, TransitionRuleType *start,
1918 TransitionRuleType *end, _tzrule *out)
1919 {
1920 _tzrule rv = {{0}};
1921
1922 rv.start = start;
1923 rv.end = end;
1924
1925 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1926 goto error;
1927 }
1928
1929 if (dst_abbr != NULL) {
1930 rv.dst_diff = dst_offset - std_offset;
1931 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1932 goto error;
1933 }
1934 }
1935 else {
1936 rv.std_only = 1;
1937 }
1938
1939 *out = rv;
1940
1941 return 0;
1942 error:
1943 xdecref_ttinfo(&rv.std);
1944 xdecref_ttinfo(&rv.dst);
1945 return -1;
1946 }
1947
1948 /* Destructor for _tzrule. */
1949 static void
free_tzrule(_tzrule * tzrule)1950 free_tzrule(_tzrule *tzrule)
1951 {
1952 xdecref_ttinfo(&(tzrule->std));
1953 if (!tzrule->std_only) {
1954 xdecref_ttinfo(&(tzrule->dst));
1955 }
1956
1957 if (tzrule->start != NULL) {
1958 PyMem_Free(tzrule->start);
1959 }
1960
1961 if (tzrule->end != NULL) {
1962 PyMem_Free(tzrule->end);
1963 }
1964 }
1965
1966 /* Calculate DST offsets from transitions and UTC offsets
1967 *
1968 * This is necessary because each C `ttinfo` only contains the UTC offset,
1969 * time zone abbreviation and an isdst boolean - it does not include the
1970 * amount of the DST offset, but we need the amount for the dst() function.
1971 *
1972 * Thus function uses heuristics to infer what the offset should be, so it
1973 * is not guaranteed that this will work for all zones. If we cannot assign
1974 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1975 * bool(dt.dst()) will always match ttinfo.isdst.
1976 */
1977 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1978 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1979 unsigned char *isdsts, size_t num_transitions,
1980 size_t num_ttinfos)
1981 {
1982 size_t dst_count = 0;
1983 size_t dst_found = 0;
1984 for (size_t i = 0; i < num_ttinfos; ++i) {
1985 dst_count++;
1986 }
1987
1988 for (size_t i = 1; i < num_transitions; ++i) {
1989 if (dst_count == dst_found) {
1990 break;
1991 }
1992
1993 size_t idx = trans_idx[i];
1994 size_t comp_idx = trans_idx[i - 1];
1995
1996 // Only look at DST offsets that have nto been assigned already
1997 if (!isdsts[idx] || dstoffs[idx] != 0) {
1998 continue;
1999 }
2000
2001 long dstoff = 0;
2002 long utcoff = utcoffs[idx];
2003
2004 if (!isdsts[comp_idx]) {
2005 dstoff = utcoff - utcoffs[comp_idx];
2006 }
2007
2008 if (!dstoff && idx < (num_ttinfos - 1)) {
2009 comp_idx = trans_idx[i + 1];
2010
2011 // If the following transition is also DST and we couldn't find
2012 // the DST offset by this point, we're going to have to skip it
2013 // and hope this transition gets assigned later
2014 if (isdsts[comp_idx]) {
2015 continue;
2016 }
2017
2018 dstoff = utcoff - utcoffs[comp_idx];
2019 }
2020
2021 if (dstoff) {
2022 dst_found++;
2023 dstoffs[idx] = dstoff;
2024 }
2025 }
2026
2027 if (dst_found < dst_count) {
2028 // If there are time zones we didn't find a value for, we'll end up
2029 // with dstoff = 0 for something where isdst=1. This is obviously
2030 // wrong — one hour will be a much better guess than 0.
2031 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2032 if (isdsts[idx] && !dstoffs[idx]) {
2033 dstoffs[idx] = 3600;
2034 }
2035 }
2036 }
2037 }
2038
2039 #define _swap(x, y, buffer) \
2040 buffer = x; \
2041 x = y; \
2042 y = buffer;
2043
2044 /* Calculate transitions in local time from UTC time and offsets.
2045 *
2046 * We want to know when each transition occurs, denominated in the number of
2047 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2048 * *local time* (note: this is *not* equivalent to the output of
2049 * datetime.timestamp, which is the total number of seconds actual elapsed
2050 * since 1970-01-01T00:00:00Z in UTC).
2051 *
2052 * This is an ambiguous question because "local time" can be ambiguous — but it
2053 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2054 *
2055 * trans_local[0]: The wall-time transitions for fold=0
2056 * trans_local[1]: The wall-time transitions for fold=1
2057 *
2058 * This returns 0 on success and a negative number of failure. The trans_local
2059 * arrays must be freed if they are not NULL.
2060 */
2061 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2062 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2063 int64_t *trans_local[2], size_t num_ttinfos,
2064 size_t num_transitions)
2065 {
2066 if (num_transitions == 0) {
2067 return 0;
2068 }
2069
2070 // Copy the UTC transitions into each array to be modified in place later
2071 for (size_t i = 0; i < 2; ++i) {
2072 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2073 if (trans_local[i] == NULL) {
2074 return -1;
2075 }
2076
2077 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2078 }
2079
2080 int64_t offset_0, offset_1, buff;
2081 if (num_ttinfos > 1) {
2082 offset_0 = utcoff[0];
2083 offset_1 = utcoff[trans_idx[0]];
2084
2085 if (offset_1 > offset_0) {
2086 _swap(offset_0, offset_1, buff);
2087 }
2088 }
2089 else {
2090 offset_0 = utcoff[0];
2091 offset_1 = utcoff[0];
2092 }
2093
2094 trans_local[0][0] += offset_0;
2095 trans_local[1][0] += offset_1;
2096
2097 for (size_t i = 1; i < num_transitions; ++i) {
2098 offset_0 = utcoff[trans_idx[i - 1]];
2099 offset_1 = utcoff[trans_idx[i]];
2100
2101 if (offset_1 > offset_0) {
2102 _swap(offset_1, offset_0, buff);
2103 }
2104
2105 trans_local[0][i] += offset_0;
2106 trans_local[1][i] += offset_1;
2107 }
2108
2109 return 0;
2110 }
2111
2112 /* Simple bisect_right binary search implementation */
2113 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2114 _bisect(const int64_t value, const int64_t *arr, size_t size)
2115 {
2116 size_t lo = 0;
2117 size_t hi = size;
2118 size_t m;
2119
2120 while (lo < hi) {
2121 m = (lo + hi) / 2;
2122 if (arr[m] > value) {
2123 hi = m;
2124 }
2125 else {
2126 lo = m + 1;
2127 }
2128 }
2129
2130 return hi;
2131 }
2132
2133 /* Find the ttinfo rules that apply at a given local datetime. */
2134 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2135 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2136 {
2137 // datetime.time has a .tzinfo attribute that passes None as the dt
2138 // argument; it only really has meaning for fixed-offset zones.
2139 if (dt == Py_None) {
2140 if (self->fixed_offset) {
2141 return &(self->tzrule_after.std);
2142 }
2143 else {
2144 return &NO_TTINFO;
2145 }
2146 }
2147
2148 int64_t ts;
2149 if (get_local_timestamp(dt, &ts)) {
2150 return NULL;
2151 }
2152
2153 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2154 assert(fold < 2);
2155 int64_t *local_transitions = self->trans_list_wall[fold];
2156 size_t num_trans = self->num_transitions;
2157
2158 if (num_trans && ts < local_transitions[0]) {
2159 return self->ttinfo_before;
2160 }
2161 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2162 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2163 PyDateTime_GET_YEAR(dt));
2164 }
2165 else {
2166 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2167 assert(idx < self->num_transitions);
2168 return self->trans_ttinfos[idx];
2169 }
2170 }
2171
2172 static int
is_leap_year(int year)2173 is_leap_year(int year)
2174 {
2175 const unsigned int ayear = (unsigned int)year;
2176 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2177 }
2178
2179 /* Calculates ordinal datetime from year, month and day. */
2180 static int
ymd_to_ord(int y,int m,int d)2181 ymd_to_ord(int y, int m, int d)
2182 {
2183 y -= 1;
2184 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2185 int yearday = DAYS_BEFORE_MONTH[m];
2186 if (m > 2 && is_leap_year(y + 1)) {
2187 yearday += 1;
2188 }
2189
2190 return days_before_year + yearday + d;
2191 }
2192
2193 /* Calculate the number of seconds since 1970-01-01 in local time.
2194 *
2195 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2196 * can easily determine which transitions a datetime falls between. See the
2197 * comment above ts_to_local for more information.
2198 * */
2199 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2200 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2201 {
2202 assert(local_ts != NULL);
2203
2204 int hour, minute, second;
2205 int ord;
2206 if (PyDateTime_CheckExact(dt)) {
2207 int y = PyDateTime_GET_YEAR(dt);
2208 int m = PyDateTime_GET_MONTH(dt);
2209 int d = PyDateTime_GET_DAY(dt);
2210 hour = PyDateTime_DATE_GET_HOUR(dt);
2211 minute = PyDateTime_DATE_GET_MINUTE(dt);
2212 second = PyDateTime_DATE_GET_SECOND(dt);
2213
2214 ord = ymd_to_ord(y, m, d);
2215 }
2216 else {
2217 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2218 if (num == NULL) {
2219 return -1;
2220 }
2221
2222 ord = PyLong_AsLong(num);
2223 Py_DECREF(num);
2224 if (ord == -1 && PyErr_Occurred()) {
2225 return -1;
2226 }
2227
2228 num = PyObject_GetAttrString(dt, "hour");
2229 if (num == NULL) {
2230 return -1;
2231 }
2232 hour = PyLong_AsLong(num);
2233 Py_DECREF(num);
2234 if (hour == -1) {
2235 return -1;
2236 }
2237
2238 num = PyObject_GetAttrString(dt, "minute");
2239 if (num == NULL) {
2240 return -1;
2241 }
2242 minute = PyLong_AsLong(num);
2243 Py_DECREF(num);
2244 if (minute == -1) {
2245 return -1;
2246 }
2247
2248 num = PyObject_GetAttrString(dt, "second");
2249 if (num == NULL) {
2250 return -1;
2251 }
2252 second = PyLong_AsLong(num);
2253 Py_DECREF(num);
2254 if (second == -1) {
2255 return -1;
2256 }
2257 }
2258
2259 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2260 (int64_t)(hour * 3600 + minute * 60 + second);
2261
2262 return 0;
2263 }
2264
2265 /////
2266 // Functions for cache handling
2267
2268 /* Constructor for StrongCacheNode */
2269 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2270 strong_cache_node_new(PyObject *key, PyObject *zone)
2271 {
2272 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2273 if (node == NULL) {
2274 return NULL;
2275 }
2276
2277 Py_INCREF(key);
2278 Py_INCREF(zone);
2279
2280 node->next = NULL;
2281 node->prev = NULL;
2282 node->key = key;
2283 node->zone = zone;
2284
2285 return node;
2286 }
2287
2288 /* Destructor for StrongCacheNode */
2289 void
strong_cache_node_free(StrongCacheNode * node)2290 strong_cache_node_free(StrongCacheNode *node)
2291 {
2292 Py_XDECREF(node->key);
2293 Py_XDECREF(node->zone);
2294
2295 PyMem_Free(node);
2296 }
2297
2298 /* Frees all nodes at or after a specified root in the strong cache.
2299 *
2300 * This can be used on the root node to free the entire cache or it can be used
2301 * to clear all nodes that have been expired (which, if everything is going
2302 * right, will actually only be 1 node at a time).
2303 */
2304 void
strong_cache_free(StrongCacheNode * root)2305 strong_cache_free(StrongCacheNode *root)
2306 {
2307 StrongCacheNode *node = root;
2308 StrongCacheNode *next_node;
2309 while (node != NULL) {
2310 next_node = node->next;
2311 strong_cache_node_free(node);
2312
2313 node = next_node;
2314 }
2315 }
2316
2317 /* Removes a node from the cache and update its neighbors.
2318 *
2319 * This is used both when ejecting a node from the cache and when moving it to
2320 * the front of the cache.
2321 */
2322 static void
remove_from_strong_cache(StrongCacheNode * node)2323 remove_from_strong_cache(StrongCacheNode *node)
2324 {
2325 if (ZONEINFO_STRONG_CACHE == node) {
2326 ZONEINFO_STRONG_CACHE = node->next;
2327 }
2328
2329 if (node->prev != NULL) {
2330 node->prev->next = node->next;
2331 }
2332
2333 if (node->next != NULL) {
2334 node->next->prev = node->prev;
2335 }
2336
2337 node->next = NULL;
2338 node->prev = NULL;
2339 }
2340
2341 /* Retrieves the node associated with a key, if it exists.
2342 *
2343 * This traverses the strong cache until it finds a matching key and returns a
2344 * pointer to the relevant node if found. Returns NULL if no node is found.
2345 *
2346 * root may be NULL, indicating an empty cache.
2347 */
2348 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2349 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2350 {
2351 const StrongCacheNode *node = root;
2352 while (node != NULL) {
2353 int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2354 if (rv < 0) {
2355 return NULL;
2356 }
2357 if (rv) {
2358 return (StrongCacheNode *)node;
2359 }
2360
2361 node = node->next;
2362 }
2363
2364 return NULL;
2365 }
2366
2367 /* Ejects a given key from the class's strong cache, if applicable.
2368 *
2369 * This function is used to enable the per-key functionality in clear_cache.
2370 */
2371 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2372 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2373 {
2374 if (type != &PyZoneInfo_ZoneInfoType) {
2375 return 0;
2376 }
2377
2378 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2379 if (node != NULL) {
2380 remove_from_strong_cache(node);
2381
2382 strong_cache_node_free(node);
2383 }
2384 else if (PyErr_Occurred()) {
2385 return -1;
2386 }
2387 return 0;
2388 }
2389
2390 /* Moves a node to the front of the LRU cache.
2391 *
2392 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2393 * it is not at the front of the cache, it needs to be moved there.
2394 */
2395 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2396 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2397 {
2398 StrongCacheNode *root_p = *root;
2399 if (root_p == node) {
2400 return;
2401 }
2402
2403 remove_from_strong_cache(node);
2404
2405 node->prev = NULL;
2406 node->next = root_p;
2407
2408 if (root_p != NULL) {
2409 root_p->prev = node;
2410 }
2411
2412 *root = node;
2413 }
2414
2415 /* Retrieves a ZoneInfo from the strong cache if it's present.
2416 *
2417 * This function finds the ZoneInfo by key and if found will move the node to
2418 * the front of the LRU cache and return a new reference to it. It returns NULL
2419 * if the key is not in the cache.
2420 *
2421 * The strong cache is currently only implemented for the base class, so this
2422 * always returns a cache miss for subclasses.
2423 */
2424 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2425 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2426 {
2427 if (type != &PyZoneInfo_ZoneInfoType) {
2428 return NULL; // Strong cache currently only implemented for base class
2429 }
2430
2431 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2432
2433 if (node != NULL) {
2434 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2435 Py_INCREF(node->zone);
2436 return node->zone;
2437 }
2438
2439 return NULL; // Cache miss
2440 }
2441
2442 /* Inserts a new key into the strong LRU cache.
2443 *
2444 * This function is only to be used after a cache miss — it creates a new node
2445 * at the front of the cache and ejects any stale entries (keeping the size of
2446 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2447 */
2448 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2449 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2450 PyObject *zone)
2451 {
2452 if (type != &PyZoneInfo_ZoneInfoType) {
2453 return;
2454 }
2455
2456 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2457
2458 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2459
2460 StrongCacheNode *node = new_node->next;
2461 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2462 if (node == NULL) {
2463 return;
2464 }
2465 node = node->next;
2466 }
2467
2468 // Everything beyond this point needs to be freed
2469 if (node != NULL) {
2470 if (node->prev != NULL) {
2471 node->prev->next = NULL;
2472 }
2473 strong_cache_free(node);
2474 }
2475 }
2476
2477 /* Clears all entries into a type's strong cache.
2478 *
2479 * Because the strong cache is not implemented for subclasses, this is a no-op
2480 * for everything except the base class.
2481 */
2482 void
clear_strong_cache(const PyTypeObject * const type)2483 clear_strong_cache(const PyTypeObject *const type)
2484 {
2485 if (type != &PyZoneInfo_ZoneInfoType) {
2486 return;
2487 }
2488
2489 strong_cache_free(ZONEINFO_STRONG_CACHE);
2490 ZONEINFO_STRONG_CACHE = NULL;
2491 }
2492
2493 static PyObject *
new_weak_cache(void)2494 new_weak_cache(void)
2495 {
2496 PyObject *weakref_module = PyImport_ImportModule("weakref");
2497 if (weakref_module == NULL) {
2498 return NULL;
2499 }
2500
2501 PyObject *weak_cache =
2502 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2503 Py_DECREF(weakref_module);
2504 return weak_cache;
2505 }
2506
2507 static int
initialize_caches(void)2508 initialize_caches(void)
2509 {
2510 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2511 if (TIMEDELTA_CACHE == NULL) {
2512 TIMEDELTA_CACHE = PyDict_New();
2513 }
2514 else {
2515 Py_INCREF(TIMEDELTA_CACHE);
2516 }
2517
2518 if (TIMEDELTA_CACHE == NULL) {
2519 return -1;
2520 }
2521
2522 if (ZONEINFO_WEAK_CACHE == NULL) {
2523 ZONEINFO_WEAK_CACHE = new_weak_cache();
2524 }
2525 else {
2526 Py_INCREF(ZONEINFO_WEAK_CACHE);
2527 }
2528
2529 if (ZONEINFO_WEAK_CACHE == NULL) {
2530 return -1;
2531 }
2532
2533 return 0;
2534 }
2535
2536 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2537 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2538 {
2539 PyObject *weak_cache = new_weak_cache();
2540 if (weak_cache == NULL) {
2541 return NULL;
2542 }
2543
2544 if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2545 weak_cache) < 0) {
2546 Py_DECREF(weak_cache);
2547 return NULL;
2548 }
2549 Py_DECREF(weak_cache);
2550 Py_RETURN_NONE;
2551 }
2552
2553 /////
2554 // Specify the ZoneInfo type
2555 static PyMethodDef zoneinfo_methods[] = {
2556 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2557 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2558 PyDoc_STR("Clear the ZoneInfo cache.")},
2559 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2560 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2561 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2562 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2563 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2564 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2565 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2566 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2567 "the given datetime.")},
2568 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2569 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2570 "in a zone at the given datetime.")},
2571 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2572 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2573 "zone that applies in a zone at a given datetime.")},
2574 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2575 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2576 "datetime in local time.")},
2577 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2578 PyDoc_STR("Function for serialization with the pickle protocol.")},
2579 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2580 PyDoc_STR("Private method used in unpickling.")},
2581 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2582 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2583 PyDoc_STR("Function to initialize subclasses.")},
2584 {NULL} /* Sentinel */
2585 };
2586
2587 static PyMemberDef zoneinfo_members[] = {
2588 {.name = "key",
2589 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2590 .type = T_OBJECT_EX,
2591 .flags = READONLY,
2592 .doc = NULL},
2593 {NULL}, /* Sentinel */
2594 };
2595
2596 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2597 PyVarObject_HEAD_INIT(NULL, 0) //
2598 .tp_name = "zoneinfo.ZoneInfo",
2599 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2600 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2601 .tp_repr = (reprfunc)zoneinfo_repr,
2602 .tp_str = (reprfunc)zoneinfo_str,
2603 .tp_getattro = PyObject_GenericGetAttr,
2604 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2605 /* .tp_doc = zoneinfo_doc, */
2606 .tp_methods = zoneinfo_methods,
2607 .tp_members = zoneinfo_members,
2608 .tp_new = zoneinfo_new,
2609 .tp_dealloc = zoneinfo_dealloc,
2610 };
2611
2612 /////
2613 // Specify the _zoneinfo module
2614 static PyMethodDef module_methods[] = {{NULL, NULL}};
2615 static void
module_free()2616 module_free()
2617 {
2618 Py_XDECREF(_tzpath_find_tzfile);
2619 _tzpath_find_tzfile = NULL;
2620
2621 Py_XDECREF(_common_mod);
2622 _common_mod = NULL;
2623
2624 Py_XDECREF(io_open);
2625 io_open = NULL;
2626
2627 xdecref_ttinfo(&NO_TTINFO);
2628
2629 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2630 Py_DECREF(TIMEDELTA_CACHE);
2631 } else {
2632 Py_CLEAR(TIMEDELTA_CACHE);
2633 }
2634
2635 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2636 Py_DECREF(ZONEINFO_WEAK_CACHE);
2637 } else {
2638 Py_CLEAR(ZONEINFO_WEAK_CACHE);
2639 }
2640
2641 clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2642 }
2643
2644 static int
zoneinfomodule_exec(PyObject * m)2645 zoneinfomodule_exec(PyObject *m)
2646 {
2647 PyDateTime_IMPORT;
2648 if (PyDateTimeAPI == NULL) {
2649 goto error;
2650 }
2651 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2652 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2653 goto error;
2654 }
2655
2656 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2657 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2658
2659 /* Populate imports */
2660 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2661 if (_tzpath_module == NULL) {
2662 goto error;
2663 }
2664
2665 _tzpath_find_tzfile =
2666 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2667 Py_DECREF(_tzpath_module);
2668 if (_tzpath_find_tzfile == NULL) {
2669 goto error;
2670 }
2671
2672 PyObject *io_module = PyImport_ImportModule("io");
2673 if (io_module == NULL) {
2674 goto error;
2675 }
2676
2677 io_open = PyObject_GetAttrString(io_module, "open");
2678 Py_DECREF(io_module);
2679 if (io_open == NULL) {
2680 goto error;
2681 }
2682
2683 _common_mod = PyImport_ImportModule("zoneinfo._common");
2684 if (_common_mod == NULL) {
2685 goto error;
2686 }
2687
2688 if (NO_TTINFO.utcoff == NULL) {
2689 NO_TTINFO.utcoff = Py_None;
2690 NO_TTINFO.dstoff = Py_None;
2691 NO_TTINFO.tzname = Py_None;
2692
2693 for (size_t i = 0; i < 3; ++i) {
2694 Py_INCREF(Py_None);
2695 }
2696 }
2697
2698 if (initialize_caches()) {
2699 goto error;
2700 }
2701
2702 return 0;
2703
2704 error:
2705 return -1;
2706 }
2707
2708 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2709 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2710
2711 static struct PyModuleDef zoneinfomodule = {
2712 PyModuleDef_HEAD_INIT,
2713 .m_name = "_zoneinfo",
2714 .m_doc = "C implementation of the zoneinfo module",
2715 .m_size = 0,
2716 .m_methods = module_methods,
2717 .m_slots = zoneinfomodule_slots,
2718 .m_free = (freefunc)module_free};
2719
2720 PyMODINIT_FUNC
PyInit__zoneinfo(void)2721 PyInit__zoneinfo(void)
2722 {
2723 return PyModuleDef_Init(&zoneinfomodule);
2724 }
2725