• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
2 // the free-threaded build to safely reclaim memory when there may be
3 // concurrent accesses.
4 //
5 // Many operations in the free-threaded build are protected by locks. However,
6 // in some cases, we want to allow reads to happen concurrently with updates.
7 // In this case, we need to delay freeing ("reclaiming") any memory that may be
8 // concurrently accessed by a reader. The QSBR APIs provide a way to do this.
9 #ifndef Py_INTERNAL_QSBR_H
10 #define Py_INTERNAL_QSBR_H
11 
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include "pycore_lock.h"        // PyMutex
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 #ifndef Py_BUILD_CORE
21 #  error "this header requires Py_BUILD_CORE define"
22 #endif
23 
24 // The shared write sequence is always odd and incremented by two. Detached
25 // threads are indicated by a read sequence of zero. This avoids collisions
26 // between the offline state and any valid sequence number even if the
27 // sequences numbers wrap around.
28 #define QSBR_OFFLINE 0
29 #define QSBR_INITIAL 1
30 #define QSBR_INCR    2
31 
32 // Wrap-around safe comparison. This is a holdover from the FreeBSD
33 // implementation, which uses 32-bit sequence numbers. We currently use 64-bit
34 // sequence numbers, so wrap-around is unlikely.
35 #define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
36 #define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
37 
38 struct _qsbr_shared;
39 struct _PyThreadStateImpl;  // forward declare to avoid circular dependency
40 
41 // Per-thread state
42 struct _qsbr_thread_state {
43     // Last observed write sequence (or 0 if detached)
44     uint64_t seq;
45 
46     // Shared (per-interpreter) QSBR state
47     struct _qsbr_shared *shared;
48 
49     // Thread state (or NULL)
50     PyThreadState *tstate;
51 
52     // Used to defer advancing write sequence a fixed number of times
53     int deferrals;
54 
55     // Is this thread state allocated?
56     bool allocated;
57     struct _qsbr_thread_state *freelist_next;
58 };
59 
60 // Padding to avoid false sharing
61 struct _qsbr_pad {
62     struct _qsbr_thread_state qsbr;
63     char __padding[64 - sizeof(struct _qsbr_thread_state)];
64 };
65 
66 // Per-interpreter state
67 struct _qsbr_shared {
68     // Write sequence: always odd, incremented by two
69     uint64_t wr_seq;
70 
71     // Minimum observed read sequence of all QSBR thread states
72     uint64_t rd_seq;
73 
74     // Array of QSBR thread states.
75     struct _qsbr_pad *array;
76     Py_ssize_t size;
77 
78     // Freelist of unused _qsbr_thread_states (protected by mutex)
79     PyMutex mutex;
80     struct _qsbr_thread_state *freelist;
81 };
82 
83 static inline uint64_t
_Py_qsbr_shared_current(struct _qsbr_shared * shared)84 _Py_qsbr_shared_current(struct _qsbr_shared *shared)
85 {
86     return _Py_atomic_load_uint64_acquire(&shared->wr_seq);
87 }
88 
89 // Reports a quiescent state: the caller no longer holds any pointer to shared
90 // data not protected by locks or reference counts.
91 static inline void
_Py_qsbr_quiescent_state(struct _qsbr_thread_state * qsbr)92 _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
93 {
94     uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
95     _Py_atomic_store_uint64_release(&qsbr->seq, seq);
96 }
97 
98 // Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
99 // but does not perform a scan of threads.
100 static inline bool
_Py_qbsr_goal_reached(struct _qsbr_thread_state * qsbr,uint64_t goal)101 _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
102 {
103     uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
104     return QSBR_LEQ(goal, rd_seq);
105 }
106 
107 // Advance the write sequence and return the new goal. This should be called
108 // after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
109 // determine when it is safe to reclaim (free) the memory.
110 extern uint64_t
111 _Py_qsbr_advance(struct _qsbr_shared *shared);
112 
113 // Batches requests to advance the write sequence. This advances the write
114 // sequence every N calls, which reduces overhead but increases time to
115 // reclamation. Returns the new goal.
116 extern uint64_t
117 _Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);
118 
119 // Have the read sequences advanced to the given goal? If this returns true,
120 // it safe to reclaim any memory tagged with the goal (or earlier goal).
121 extern bool
122 _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);
123 
124 // Called when thread attaches to interpreter
125 extern void
126 _Py_qsbr_attach(struct _qsbr_thread_state *qsbr);
127 
128 // Called when thread detaches from interpreter
129 extern void
130 _Py_qsbr_detach(struct _qsbr_thread_state *qsbr);
131 
132 // Reserves (allocates) a QSBR state and returns its index.
133 extern Py_ssize_t
134 _Py_qsbr_reserve(PyInterpreterState *interp);
135 
136 // Associates a PyThreadState with the QSBR state at the given index
137 extern void
138 _Py_qsbr_register(struct _PyThreadStateImpl *tstate,
139                   PyInterpreterState *interp, Py_ssize_t index);
140 
141 // Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
142 extern void
143 _Py_qsbr_unregister(PyThreadState *tstate);
144 
145 extern void
146 _Py_qsbr_fini(PyInterpreterState *interp);
147 
148 extern void
149 _Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate);
150 
151 #ifdef __cplusplus
152 }
153 #endif
154 #endif   /* !Py_INTERNAL_QSBR_H */
155