1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_BACKING_DEV_DEFS_H
3 #define __LINUX_BACKING_DEV_DEFS_H
4
5 #include <linux/list.h>
6 #include <linux/radix-tree.h>
7 #include <linux/rbtree.h>
8 #include <linux/spinlock.h>
9 #include <linux/percpu_counter.h>
10 #include <linux/percpu-refcount.h>
11 #include <linux/flex_proportions.h>
12 #include <linux/timer.h>
13 #include <linux/workqueue.h>
14 #include <linux/kref.h>
15 #include <linux/refcount.h>
16 #include <linux/android_kabi.h>
17
18 struct page;
19 struct device;
20 struct dentry;
21
22 /*
23 * Bits in bdi_writeback.state
24 */
25 enum wb_state {
26 WB_registered, /* bdi_register() was done */
27 WB_writeback_running, /* Writeback is in progress */
28 WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */
29 WB_start_all, /* nr_pages == 0 (all) work pending */
30 };
31
32 enum wb_congested_state {
33 WB_async_congested, /* The async (write) queue is getting full */
34 WB_sync_congested, /* The sync queue is getting full */
35 };
36
37 enum wb_stat_item {
38 WB_RECLAIMABLE,
39 WB_WRITEBACK,
40 WB_DIRTIED,
41 WB_WRITTEN,
42 NR_WB_STAT_ITEMS
43 };
44
45 #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
46
47 /*
48 * why some writeback work was initiated
49 */
50 enum wb_reason {
51 WB_REASON_BACKGROUND,
52 WB_REASON_VMSCAN,
53 WB_REASON_SYNC,
54 WB_REASON_PERIODIC,
55 WB_REASON_LAPTOP_TIMER,
56 WB_REASON_FS_FREE_SPACE,
57 /*
58 * There is no bdi forker thread any more and works are done
59 * by emergency worker, however, this is TPs userland visible
60 * and we'll be exposing exactly the same information,
61 * so it has a mismatch name.
62 */
63 WB_REASON_FORKER_THREAD,
64 WB_REASON_FOREIGN_FLUSH,
65
66 WB_REASON_MAX,
67 };
68
69 struct wb_completion {
70 atomic_t cnt;
71 wait_queue_head_t *waitq;
72 };
73
74 #define __WB_COMPLETION_INIT(_waitq) \
75 (struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
76
77 /*
78 * If one wants to wait for one or more wb_writeback_works, each work's
79 * ->done should be set to a wb_completion defined using the following
80 * macro. Once all work items are issued with wb_queue_work(), the caller
81 * can wait for the completion of all using wb_wait_for_completion(). Work
82 * items which are waited upon aren't freed automatically on completion.
83 */
84 #define WB_COMPLETION_INIT(bdi) __WB_COMPLETION_INIT(&(bdi)->wb_waitq)
85
86 #define DEFINE_WB_COMPLETION(cmpl, bdi) \
87 struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
88
89 /*
90 * Each wb (bdi_writeback) can perform writeback operations, is measured
91 * and throttled, independently. Without cgroup writeback, each bdi
92 * (bdi_writeback) is served by its embedded bdi->wb.
93 *
94 * On the default hierarchy, blkcg implicitly enables memcg. This allows
95 * using memcg's page ownership for attributing writeback IOs, and every
96 * memcg - blkcg combination can be served by its own wb by assigning a
97 * dedicated wb to each memcg, which enables isolation across different
98 * cgroups and propagation of IO back pressure down from the IO layer upto
99 * the tasks which are generating the dirty pages to be written back.
100 *
101 * A cgroup wb is indexed on its bdi by the ID of the associated memcg,
102 * refcounted with the number of inodes attached to it, and pins the memcg
103 * and the corresponding blkcg. As the corresponding blkcg for a memcg may
104 * change as blkcg is disabled and enabled higher up in the hierarchy, a wb
105 * is tested for blkcg after lookup and removed from index on mismatch so
106 * that a new wb for the combination can be created.
107 */
108 struct bdi_writeback {
109 struct backing_dev_info *bdi; /* our parent bdi */
110
111 unsigned long state; /* Always use atomic bitops on this */
112 unsigned long last_old_flush; /* last old data flush */
113
114 struct list_head b_dirty; /* dirty inodes */
115 struct list_head b_io; /* parked for writeback */
116 struct list_head b_more_io; /* parked for more writeback */
117 struct list_head b_dirty_time; /* time stamps are dirty */
118 spinlock_t list_lock; /* protects the b_* lists */
119
120 atomic_t writeback_inodes; /* number of inodes under writeback */
121 struct percpu_counter stat[NR_WB_STAT_ITEMS];
122
123 unsigned long congested; /* WB_[a]sync_congested flags */
124
125 unsigned long bw_time_stamp; /* last time write bw is updated */
126 unsigned long dirtied_stamp;
127 unsigned long written_stamp; /* pages written at bw_time_stamp */
128 unsigned long write_bandwidth; /* the estimated write bandwidth */
129 unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
130
131 /*
132 * The base dirty throttle rate, re-calculated on every 200ms.
133 * All the bdi tasks' dirty rate will be curbed under it.
134 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
135 * in small steps and is much more smooth/stable than the latter.
136 */
137 unsigned long dirty_ratelimit;
138 unsigned long balanced_dirty_ratelimit;
139
140 struct fprop_local_percpu completions;
141 int dirty_exceeded;
142 enum wb_reason start_all_reason;
143
144 spinlock_t work_lock; /* protects work_list & dwork scheduling */
145 struct list_head work_list;
146 struct delayed_work dwork; /* work item used for writeback */
147 struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
148
149 unsigned long dirty_sleep; /* last wait */
150
151 struct list_head bdi_node; /* anchored at bdi->wb_list */
152
153 #ifdef CONFIG_CGROUP_WRITEBACK
154 struct percpu_ref refcnt; /* used only for !root wb's */
155 struct fprop_local_percpu memcg_completions;
156 struct cgroup_subsys_state *memcg_css; /* the associated memcg */
157 struct cgroup_subsys_state *blkcg_css; /* and blkcg */
158 struct list_head memcg_node; /* anchored at memcg->cgwb_list */
159 struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
160 struct list_head b_attached; /* attached inodes, protected by list_lock */
161 struct list_head offline_node; /* anchored at offline_cgwbs */
162
163 union {
164 struct work_struct release_work;
165 struct rcu_head rcu;
166 };
167 #endif
168
169 ANDROID_KABI_RESERVE(1);
170 ANDROID_KABI_RESERVE(2);
171 };
172
173 struct backing_dev_info {
174 u64 id;
175 struct rb_node rb_node; /* keyed by ->id */
176 struct list_head bdi_list;
177 unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
178 unsigned long io_pages; /* max allowed IO size */
179
180 struct kref refcnt; /* Reference counter for the structure */
181 unsigned int capabilities; /* Device capabilities */
182 unsigned int min_ratio;
183 unsigned int max_ratio, max_prop_frac;
184
185 /*
186 * Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are
187 * any dirty wbs, which is depended upon by bdi_has_dirty().
188 */
189 atomic_long_t tot_write_bandwidth;
190
191 struct bdi_writeback wb; /* the root writeback info for this bdi */
192 struct list_head wb_list; /* list of all wbs */
193 #ifdef CONFIG_CGROUP_WRITEBACK
194 struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
195 struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
196 struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
197 #endif
198 wait_queue_head_t wb_waitq;
199
200 struct device *dev;
201 char dev_name[64];
202 struct device *owner;
203
204 struct timer_list laptop_mode_wb_timer;
205
206 #ifdef CONFIG_DEBUG_FS
207 struct dentry *debug_dir;
208 #endif
209
210 ANDROID_KABI_RESERVE(1);
211 ANDROID_KABI_RESERVE(2);
212 };
213
214 enum {
215 BLK_RW_ASYNC = 0,
216 BLK_RW_SYNC = 1,
217 };
218
219 void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
220 void set_bdi_congested(struct backing_dev_info *bdi, int sync);
221
222 struct wb_lock_cookie {
223 bool locked;
224 unsigned long flags;
225 };
226
227 #ifdef CONFIG_CGROUP_WRITEBACK
228
229 /**
230 * wb_tryget - try to increment a wb's refcount
231 * @wb: bdi_writeback to get
232 */
wb_tryget(struct bdi_writeback * wb)233 static inline bool wb_tryget(struct bdi_writeback *wb)
234 {
235 if (wb != &wb->bdi->wb)
236 return percpu_ref_tryget(&wb->refcnt);
237 return true;
238 }
239
240 /**
241 * wb_get - increment a wb's refcount
242 * @wb: bdi_writeback to get
243 */
wb_get(struct bdi_writeback * wb)244 static inline void wb_get(struct bdi_writeback *wb)
245 {
246 if (wb != &wb->bdi->wb)
247 percpu_ref_get(&wb->refcnt);
248 }
249
250 /**
251 * wb_put - decrement a wb's refcount
252 * @wb: bdi_writeback to put
253 * @nr: number of references to put
254 */
wb_put_many(struct bdi_writeback * wb,unsigned long nr)255 static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
256 {
257 if (WARN_ON_ONCE(!wb->bdi)) {
258 /*
259 * A driver bug might cause a file to be removed before bdi was
260 * initialized.
261 */
262 return;
263 }
264
265 if (wb != &wb->bdi->wb)
266 percpu_ref_put_many(&wb->refcnt, nr);
267 }
268
269 /**
270 * wb_put - decrement a wb's refcount
271 * @wb: bdi_writeback to put
272 */
wb_put(struct bdi_writeback * wb)273 static inline void wb_put(struct bdi_writeback *wb)
274 {
275 wb_put_many(wb, 1);
276 }
277
278 /**
279 * wb_dying - is a wb dying?
280 * @wb: bdi_writeback of interest
281 *
282 * Returns whether @wb is unlinked and being drained.
283 */
wb_dying(struct bdi_writeback * wb)284 static inline bool wb_dying(struct bdi_writeback *wb)
285 {
286 return percpu_ref_is_dying(&wb->refcnt);
287 }
288
289 #else /* CONFIG_CGROUP_WRITEBACK */
290
wb_tryget(struct bdi_writeback * wb)291 static inline bool wb_tryget(struct bdi_writeback *wb)
292 {
293 return true;
294 }
295
wb_get(struct bdi_writeback * wb)296 static inline void wb_get(struct bdi_writeback *wb)
297 {
298 }
299
wb_put(struct bdi_writeback * wb)300 static inline void wb_put(struct bdi_writeback *wb)
301 {
302 }
303
wb_put_many(struct bdi_writeback * wb,unsigned long nr)304 static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
305 {
306 }
307
wb_dying(struct bdi_writeback * wb)308 static inline bool wb_dying(struct bdi_writeback *wb)
309 {
310 return false;
311 }
312
313 #endif /* CONFIG_CGROUP_WRITEBACK */
314
315 #endif /* __LINUX_BACKING_DEV_DEFS_H */
316