Lines Matching +full:charge +full:- +full:current +full:- +full:limit +full:- +full:mapping
1 // SPDX-License-Identifier: GPL-2.0-only
3 * mm/page-writeback.c
26 #include <linux/backing-dev.h>
54 #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
100 * The interval between `kupdate'-style writebacks
119 /* End of sysctl-exported parameters */
137 unsigned long wb_dirty; /* per-wb counterparts */
147 * reflect changes in current writeout rate.
155 .wb_completions = &(__wb)->completions
161 .wb_completions = &(__wb)->memcg_completions, \
166 return dtc->dom; in mdtc_valid()
171 return dtc->dom; in dtc_dom()
176 return mdtc->gdtc; in mdtc_gdtc()
181 return &wb->memcg_completions; in wb_memcg_completions()
187 unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth); in wb_min_max_ratio()
188 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); in wb_min_max_ratio()
189 unsigned long long min = wb->bdi->min_ratio; in wb_min_max_ratio()
190 unsigned long long max = wb->bdi->max_ratio; in wb_min_max_ratio()
214 .wb_completions = &(__wb)->completions
241 *minp = wb->bdi->min_ratio; in wb_min_max_ratio()
242 *maxp = wb->bdi->max_ratio; in wb_min_max_ratio()
255 * user-configurable dirty ratio is the effective number of pages that
259 * Because the user is allowed to specify the dirty limit globally as
260 * absolute number of bytes, calculating the per-zone dirty limit can
261 * require translating the configured limit into a percentage of
266 * node_dirtyable_memory - number of dirtyable pages in a node
270 * page cache. This is the base value for the per-node dirty limits.
278 struct zone *zone = pgdat->node_zones + z; in node_dirtyable_memory()
291 nr_pages -= min(nr_pages, pgdat->totalreserve_pages); in node_dirtyable_memory()
314 z = &NODE_DATA(node)->node_zones[i]; in highmem_dirtyable_memory()
320 nr_pages -= min(nr_pages, high_wmark_pages(z)); in highmem_dirtyable_memory()
340 * global_dirtyable_memory - number of globally dirtyable pages
355 x -= min(x, totalreserve_pages); in global_dirtyable_memory()
361 x -= highmem_dirtyable_memory(x); in global_dirtyable_memory()
367 * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
370 * Calculate @dtc->thresh and ->bg_thresh considering
372 * must ensure that @dtc->avail is set before calling this function. The
373 * dirty limits will be lifted by 1/4 for real-time tasks.
377 const unsigned long available_memory = dtc->avail; in domain_dirty_limits()
381 /* convert ratios to per-PAGE_SIZE for higher precision */ in domain_dirty_limits()
390 unsigned long global_avail = gdtc->avail; in domain_dirty_limits()
396 * per-PAGE_SIZE, they can be obtained by dividing bytes by in domain_dirty_limits()
418 tsk = current; in domain_dirty_limits()
425 * 32-bits. This gives 16TB dirty limits max which is hopefully enough. in domain_dirty_limits()
429 /* This makes sure bg_thresh is within 32-bits as well */ in domain_dirty_limits()
432 dtc->thresh = thresh; in domain_dirty_limits()
433 dtc->bg_thresh = bg_thresh; in domain_dirty_limits()
441 * global_dirty_limits - background-writeback and dirty-throttling thresholds
460 * node_dirty_limit - maximum number of dirty pages allowed in a node
469 struct task_struct *tsk = current; in node_dirty_limit()
483 * 32-bits. This gives 16TB dirty limits max which is hopefully enough. in node_dirty_limit()
489 * node_dirty_ok - tells whether a node is within its dirty limits
493 * dirty limit, %false if the limit is exceeded.
497 unsigned long limit = node_dirty_limit(pgdat); in node_dirty_ok() local
503 return nr_pages <= limit; in node_dirty_ok()
529 return -ERANGE; in dirty_background_bytes_handler()
560 return -ERANGE; in dirty_bytes_handler()
582 __fprop_add_percpu_max(&dom->completions, completions, in wb_domain_writeout_add()
585 if (unlikely(!dom->period_time)) { in wb_domain_writeout_add()
592 dom->period_time = wp_next_time(jiffies); in wb_domain_writeout_add()
593 mod_timer(&dom->period_timer, dom->period_time); in wb_domain_writeout_add()
606 wb_domain_writeout_add(&global_wb_domain, &wb->completions, in __wb_writeout_add()
607 wb->bdi->max_prop_frac, nr); in __wb_writeout_add()
612 wb->bdi->max_prop_frac, nr); in __wb_writeout_add()
632 int miss_periods = (jiffies - dom->period_time) / in writeout_period()
635 if (fprop_new_period(&dom->completions, miss_periods + 1)) { in writeout_period()
636 dom->period_time = wp_next_time(dom->period_time + in writeout_period()
638 mod_timer(&dom->period_timer, dom->period_time); in writeout_period()
644 dom->period_time = 0; in writeout_period()
652 spin_lock_init(&dom->lock); in wb_domain_init()
654 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE); in wb_domain_init()
656 dom->dirty_limit_tstamp = jiffies; in wb_domain_init()
658 return fprop_global_init(&dom->completions, gfp); in wb_domain_init()
664 del_timer_sync(&dom->period_timer); in wb_domain_exit()
665 fprop_global_destroy(&dom->completions); in wb_domain_exit()
681 return -EINVAL; in bdi_check_pages_limit()
716 return -EINVAL; in __bdi_set_min_ratio()
720 if (min_ratio > bdi->max_ratio) { in __bdi_set_min_ratio()
721 ret = -EINVAL; in __bdi_set_min_ratio()
723 if (min_ratio < bdi->min_ratio) { in __bdi_set_min_ratio()
724 delta = bdi->min_ratio - min_ratio; in __bdi_set_min_ratio()
725 bdi_min_ratio -= delta; in __bdi_set_min_ratio()
726 bdi->min_ratio = min_ratio; in __bdi_set_min_ratio()
728 delta = min_ratio - bdi->min_ratio; in __bdi_set_min_ratio()
731 bdi->min_ratio = min_ratio; in __bdi_set_min_ratio()
733 ret = -EINVAL; in __bdi_set_min_ratio()
747 return -EINVAL; in __bdi_set_max_ratio()
750 if (bdi->min_ratio > max_ratio) { in __bdi_set_max_ratio()
751 ret = -EINVAL; in __bdi_set_max_ratio()
753 bdi->max_ratio = max_ratio; in __bdi_set_max_ratio()
754 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; in __bdi_set_max_ratio()
784 return bdi_get_bytes(bdi->min_ratio); in bdi_get_min_bytes()
803 return bdi_get_bytes(bdi->max_ratio); in bdi_get_max_bytes()
823 return -EINVAL; in bdi_set_strict_limit()
827 bdi->capabilities |= BDI_CAP_STRICTLIMIT; in bdi_set_strict_limit()
829 bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; in bdi_set_strict_limit()
844 return max(thresh, dom->dirty_limit); in hard_dirty_limit()
849 * system-wide clean memory excluding the amount being used in the domain.
855 unsigned long clean = filepages - min(filepages, mdtc->dirty); in mdtc_calc_avail()
856 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); in mdtc_calc_avail()
857 unsigned long other_clean = global_clean - min(global_clean, clean); in mdtc_calc_avail()
859 mdtc->avail = filepages + min(headroom, other_clean); in mdtc_calc_avail()
863 * __wb_calc_thresh - @wb's share of dirty throttling threshold
866 * Note that balance_dirty_pages() will only seriously take it as a hard limit
874 * - starving fast devices
875 * - piling up dirty pages (that will take long time to sync) on slow devices
877 * The wb's share of dirty limit will be adapting to its throughput and
878 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
880 * Return: @wb's dirty limit in pages. The term "dirty" in the context of
886 unsigned long thresh = dtc->thresh; in __wb_calc_thresh()
894 fprop_fraction_percpu(&dom->completions, dtc->wb_completions, in __wb_calc_thresh()
897 wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); in __wb_calc_thresh()
901 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); in __wb_calc_thresh()
918 * setpoint - dirty 3
919 * f(dirty) := 1.0 + (----------------)
920 * limit - setpoint
926 * (3) f(limit) = 0 => the hard limit
933 unsigned long limit) in pos_ratio_polynom() argument
938 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, in pos_ratio_polynom()
939 (limit - setpoint) | 1); in pos_ratio_polynom()
986 * 0 +------------.------------------.----------------------*------------->
987 * freerun^ setpoint^ limit^ dirty pages
1014 * 0 +----------------------.-------------------------------.------------->
1019 * - start writing to a slow SD card and a fast disk at the same time. The SD
1021 * - the wb dirty thresh drops quickly due to change of JBOD workload
1025 struct bdi_writeback *wb = dtc->wb; in wb_position_ratio()
1026 unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth); in wb_position_ratio()
1027 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); in wb_position_ratio()
1028 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); in wb_position_ratio() local
1029 unsigned long wb_thresh = dtc->wb_thresh; in wb_position_ratio()
1034 long long pos_ratio; /* for scaling up/down the rate limit */ in wb_position_ratio()
1037 dtc->pos_ratio = 0; in wb_position_ratio()
1039 if (unlikely(dtc->dirty >= limit)) in wb_position_ratio()
1047 setpoint = (freerun + limit) / 2; in wb_position_ratio()
1048 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit); in wb_position_ratio()
1055 * This is especially important for fuse which sets bdi->max_ratio to in wb_position_ratio()
1062 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global in wb_position_ratio()
1075 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { in wb_position_ratio()
1078 if (dtc->wb_dirty < 8) { in wb_position_ratio()
1079 dtc->pos_ratio = min_t(long long, pos_ratio * 2, in wb_position_ratio()
1084 if (dtc->wb_dirty >= wb_thresh) in wb_position_ratio()
1088 dtc->wb_bg_thresh); in wb_position_ratio()
1093 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty, in wb_position_ratio()
1103 * wb's) while given strictlimit wb is below limit. in wb_position_ratio()
1106 * but it would look too non-natural for the case of all in wb_position_ratio()
1108 * with bdi->max_ratio == 100%. in wb_position_ratio()
1117 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio); in wb_position_ratio()
1130 * f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint) in wb_position_ratio()
1132 * x_intercept - wb_dirty in wb_position_ratio()
1133 * := -------------------------- in wb_position_ratio()
1134 * x_intercept - wb_setpoint in wb_position_ratio()
1139 * (2) k = - 1 / (8 * write_bw) (in single wb case) in wb_position_ratio()
1144 * [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2] in wb_position_ratio()
1152 if (unlikely(wb_thresh > dtc->thresh)) in wb_position_ratio()
1153 wb_thresh = dtc->thresh; in wb_position_ratio()
1161 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8); in wb_position_ratio()
1166 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1); in wb_position_ratio()
1170 * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case. in wb_position_ratio()
1172 * wb_thresh thresh - wb_thresh in wb_position_ratio()
1173 * span = --------- * (8 * write_bw) + ------------------ * wb_thresh in wb_position_ratio()
1176 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16; in wb_position_ratio()
1179 if (dtc->wb_dirty < x_intercept - span / 4) { in wb_position_ratio()
1180 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty), in wb_position_ratio()
1181 (x_intercept - wb_setpoint) | 1); in wb_position_ratio()
1191 if (dtc->wb_dirty < x_intercept) { in wb_position_ratio()
1192 if (dtc->wb_dirty > x_intercept / 8) in wb_position_ratio()
1194 dtc->wb_dirty); in wb_position_ratio()
1199 dtc->pos_ratio = pos_ratio; in wb_position_ratio()
1207 unsigned long avg = wb->avg_write_bandwidth; in wb_update_write_bandwidth()
1208 unsigned long old = wb->write_bandwidth; in wb_update_write_bandwidth()
1214 * bw * elapsed + write_bandwidth * (period - elapsed) in wb_update_write_bandwidth()
1215 * write_bandwidth = --------------------------------------------------- in wb_update_write_bandwidth()
1221 bw = written - min(written, wb->written_stamp); in wb_update_write_bandwidth()
1228 bw += (u64)wb->write_bandwidth * (period - elapsed); in wb_update_write_bandwidth()
1235 avg -= (avg - old) >> 3; in wb_update_write_bandwidth()
1238 avg += (old - avg) >> 3; in wb_update_write_bandwidth()
1244 long delta = avg - wb->avg_write_bandwidth; in wb_update_write_bandwidth()
1246 &wb->bdi->tot_write_bandwidth) <= 0); in wb_update_write_bandwidth()
1248 wb->write_bandwidth = bw; in wb_update_write_bandwidth()
1249 WRITE_ONCE(wb->avg_write_bandwidth, avg); in wb_update_write_bandwidth()
1255 unsigned long thresh = dtc->thresh; in update_dirty_limit()
1256 unsigned long limit = dom->dirty_limit; in update_dirty_limit() local
1261 if (limit < thresh) { in update_dirty_limit()
1262 limit = thresh; in update_dirty_limit()
1269 * dom->dirty_limit which is guaranteed to lie above the dirty pages. in update_dirty_limit()
1271 thresh = max(thresh, dtc->dirty); in update_dirty_limit()
1272 if (limit > thresh) { in update_dirty_limit()
1273 limit -= (limit - thresh) >> 5; in update_dirty_limit()
1278 dom->dirty_limit = limit; in update_dirty_limit()
1289 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) in domain_update_dirty_limit()
1292 spin_lock(&dom->lock); in domain_update_dirty_limit()
1293 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) { in domain_update_dirty_limit()
1295 dom->dirty_limit_tstamp = now; in domain_update_dirty_limit()
1297 spin_unlock(&dom->lock); in domain_update_dirty_limit()
1301 * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
1310 struct bdi_writeback *wb = dtc->wb; in wb_update_dirty_ratelimit()
1311 unsigned long dirty = dtc->dirty; in wb_update_dirty_ratelimit()
1312 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); in wb_update_dirty_ratelimit()
1313 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); in wb_update_dirty_ratelimit() local
1314 unsigned long setpoint = (freerun + limit) / 2; in wb_update_dirty_ratelimit()
1315 unsigned long write_bw = wb->avg_write_bandwidth; in wb_update_dirty_ratelimit()
1316 unsigned long dirty_ratelimit = wb->dirty_ratelimit; in wb_update_dirty_ratelimit()
1326 * when dirty pages are truncated by userspace or re-dirtied by FS. in wb_update_dirty_ratelimit()
1328 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed; in wb_update_dirty_ratelimit()
1334 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT; in wb_update_dirty_ratelimit()
1341 * formula will yield the balanced rate limit (write_bw / N). in wb_update_dirty_ratelimit()
1378 * wb->dirty_ratelimit = balanced_dirty_ratelimit; in wb_update_dirty_ratelimit()
1382 * limit the step size. in wb_update_dirty_ratelimit()
1386 * task_ratelimit - dirty_ratelimit in wb_update_dirty_ratelimit()
1387 * = (pos_ratio - 1) * dirty_ratelimit in wb_update_dirty_ratelimit()
1396 * - dirty_ratelimit > balanced_dirty_ratelimit in wb_update_dirty_ratelimit()
1397 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint) in wb_update_dirty_ratelimit()
1403 * |task_ratelimit - dirty_ratelimit| is used to limit the step size in wb_update_dirty_ratelimit()
1422 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { in wb_update_dirty_ratelimit()
1423 dirty = dtc->wb_dirty; in wb_update_dirty_ratelimit()
1424 if (dtc->wb_dirty < 8) in wb_update_dirty_ratelimit()
1425 setpoint = dtc->wb_dirty + 1; in wb_update_dirty_ratelimit()
1427 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2; in wb_update_dirty_ratelimit()
1431 x = min3(wb->balanced_dirty_ratelimit, in wb_update_dirty_ratelimit()
1434 step = x - dirty_ratelimit; in wb_update_dirty_ratelimit()
1436 x = max3(wb->balanced_dirty_ratelimit, in wb_update_dirty_ratelimit()
1439 step = dirty_ratelimit - x; in wb_update_dirty_ratelimit()
1456 dirty_ratelimit -= step; in wb_update_dirty_ratelimit()
1458 WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL)); in wb_update_dirty_ratelimit()
1459 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit; in wb_update_dirty_ratelimit()
1468 struct bdi_writeback *wb = gdtc->wb; in __wb_update_bandwidth()
1474 spin_lock(&wb->list_lock); in __wb_update_bandwidth()
1482 elapsed = max(now - wb->bw_time_stamp, 1UL); in __wb_update_bandwidth()
1483 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); in __wb_update_bandwidth()
1484 written = percpu_counter_read(&wb->stat[WB_WRITTEN]); in __wb_update_bandwidth()
1501 wb->dirtied_stamp = dirtied; in __wb_update_bandwidth()
1502 wb->written_stamp = written; in __wb_update_bandwidth()
1503 WRITE_ONCE(wb->bw_time_stamp, now); in __wb_update_bandwidth()
1504 spin_unlock(&wb->list_lock); in __wb_update_bandwidth()
1520 unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp); in wb_bandwidth_estimate_start()
1523 !atomic_read(&wb->writeback_inodes)) { in wb_bandwidth_estimate_start()
1524 spin_lock(&wb->list_lock); in wb_bandwidth_estimate_start()
1525 wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED); in wb_bandwidth_estimate_start()
1526 wb->written_stamp = wb_stat(wb, WB_WRITTEN); in wb_bandwidth_estimate_start()
1527 WRITE_ONCE(wb->bw_time_stamp, now); in wb_bandwidth_estimate_start()
1528 spin_unlock(&wb->list_lock); in wb_bandwidth_estimate_start()
1537 * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
1544 return 1UL << (ilog2(thresh - dirty) >> 1); in dirty_poll_interval()
1552 unsigned long bw = READ_ONCE(wb->avg_write_bandwidth); in wb_max_pause()
1556 * Limit pause time for small memory systems. If sleeping for too long in wb_max_pause()
1574 long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth)); in wb_min_pause()
1575 long lo = ilog2(READ_ONCE(wb->dirty_ratelimit)); in wb_min_pause()
1580 /* target for 10ms pause on 1-dd case */ in wb_min_pause()
1590 t += (hi - lo) * (10 * HZ) / 1024; in wb_min_pause()
1606 * 2) limit the target pause time to max_pause/2, so that the normal in wb_min_pause()
1615 * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}. in wb_min_pause()
1645 struct bdi_writeback *wb = dtc->wb; in wb_dirty_limits()
1651 * - in JBOD setup, wb_thresh can fluctuate a lot in wb_dirty_limits()
1652 * - in a system with HDD and USB key, the USB key may somehow in wb_dirty_limits()
1661 dtc->wb_thresh = __wb_calc_thresh(dtc); in wb_dirty_limits()
1662 dtc->wb_bg_thresh = dtc->thresh ? in wb_dirty_limits()
1663 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; in wb_dirty_limits()
1671 * reported dirty, even though there are thresh-m pages in wb_dirty_limits()
1675 if (dtc->wb_thresh < 2 * wb_stat_error()) { in wb_dirty_limits()
1677 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); in wb_dirty_limits()
1680 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK); in wb_dirty_limits()
1709 struct backing_dev_info *bdi = wb->bdi; in balance_dirty_pages()
1710 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; in balance_dirty_pages()
1722 gdtc->avail = global_dirtyable_memory(); in balance_dirty_pages()
1723 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK); in balance_dirty_pages()
1730 dirty = gdtc->wb_dirty; in balance_dirty_pages()
1731 thresh = gdtc->wb_thresh; in balance_dirty_pages()
1732 bg_thresh = gdtc->wb_bg_thresh; in balance_dirty_pages()
1734 dirty = gdtc->dirty; in balance_dirty_pages()
1735 thresh = gdtc->thresh; in balance_dirty_pages()
1736 bg_thresh = gdtc->bg_thresh; in balance_dirty_pages()
1747 &mdtc->dirty, &writeback); in balance_dirty_pages()
1748 mdtc->dirty += writeback; in balance_dirty_pages()
1755 m_dirty = mdtc->wb_dirty; in balance_dirty_pages()
1756 m_thresh = mdtc->wb_thresh; in balance_dirty_pages()
1757 m_bg_thresh = mdtc->wb_bg_thresh; in balance_dirty_pages()
1759 m_dirty = mdtc->dirty; in balance_dirty_pages()
1760 m_thresh = mdtc->thresh; in balance_dirty_pages()
1761 m_bg_thresh = mdtc->bg_thresh; in balance_dirty_pages()
1774 if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh && in balance_dirty_pages()
1780 * catch-up. This avoids (excessively) small writeouts in balance_dirty_pages()
1785 * up are the price we consciously pay for strictlimit-ing. in balance_dirty_pages()
1800 current->dirty_paused_when = now; in balance_dirty_pages()
1801 current->nr_dirtied = 0; in balance_dirty_pages()
1804 current->nr_dirtied_pause = min(intv, m_intv); in balance_dirty_pages()
1821 if ((current->flags & PF_LOCAL_THROTTLE) && in balance_dirty_pages()
1822 gdtc->wb_dirty < in balance_dirty_pages()
1823 dirty_freerun_ceiling(gdtc->wb_thresh, in balance_dirty_pages()
1824 gdtc->wb_bg_thresh)) in balance_dirty_pages()
1827 * when below the per-wb freerun ceiling. in balance_dirty_pages()
1832 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) && in balance_dirty_pages()
1833 ((gdtc->dirty > gdtc->thresh) || strictlimit); in balance_dirty_pages()
1848 if ((current->flags & PF_LOCAL_THROTTLE) && in balance_dirty_pages()
1849 mdtc->wb_dirty < in balance_dirty_pages()
1850 dirty_freerun_ceiling(mdtc->wb_thresh, in balance_dirty_pages()
1851 mdtc->wb_bg_thresh)) in balance_dirty_pages()
1854 * throttled when below the per-wb in balance_dirty_pages()
1859 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) && in balance_dirty_pages()
1860 ((mdtc->dirty > mdtc->thresh) || strictlimit); in balance_dirty_pages()
1863 if (mdtc->pos_ratio < gdtc->pos_ratio) in balance_dirty_pages()
1867 if (dirty_exceeded != wb->dirty_exceeded) in balance_dirty_pages()
1868 wb->dirty_exceeded = dirty_exceeded; in balance_dirty_pages()
1870 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + in balance_dirty_pages()
1875 dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit); in balance_dirty_pages()
1876 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >> in balance_dirty_pages()
1878 max_pause = wb_max_pause(wb, sdtc->wb_dirty); in balance_dirty_pages()
1890 if (current->dirty_paused_when) in balance_dirty_pages()
1891 pause -= now - current->dirty_paused_when; in balance_dirty_pages()
1894 * for up to 800ms from time to time on 1-HDD; so does xfs, in balance_dirty_pages()
1901 sdtc->thresh, in balance_dirty_pages()
1902 sdtc->bg_thresh, in balance_dirty_pages()
1903 sdtc->dirty, in balance_dirty_pages()
1904 sdtc->wb_thresh, in balance_dirty_pages()
1905 sdtc->wb_dirty, in balance_dirty_pages()
1912 if (pause < -HZ) { in balance_dirty_pages()
1913 current->dirty_paused_when = now; in balance_dirty_pages()
1914 current->nr_dirtied = 0; in balance_dirty_pages()
1916 current->dirty_paused_when += period; in balance_dirty_pages()
1917 current->nr_dirtied = 0; in balance_dirty_pages()
1918 } else if (current->nr_dirtied_pause <= pages_dirtied) in balance_dirty_pages()
1919 current->nr_dirtied_pause += pages_dirtied; in balance_dirty_pages()
1924 now += min(pause - max_pause, max_pause); in balance_dirty_pages()
1930 sdtc->thresh, in balance_dirty_pages()
1931 sdtc->bg_thresh, in balance_dirty_pages()
1932 sdtc->dirty, in balance_dirty_pages()
1933 sdtc->wb_thresh, in balance_dirty_pages()
1934 sdtc->wb_dirty, in balance_dirty_pages()
1942 ret = -EAGAIN; in balance_dirty_pages()
1946 bdi->last_bdp_sleep = jiffies; in balance_dirty_pages()
1949 current->dirty_paused_when = now + pause; in balance_dirty_pages()
1950 current->nr_dirtied = 0; in balance_dirty_pages()
1951 current->nr_dirtied_pause = nr_dirtied_pause; in balance_dirty_pages()
1965 * In theory 1 page is enough to keep the consumer-producer in balance_dirty_pages()
1970 if (sdtc->wb_dirty <= wb_stat_error()) in balance_dirty_pages()
1973 if (fatal_signal_pending(current)) in balance_dirty_pages()
1984 * dirty tsk->nr_dirtied_pause pages;
1988 * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
1990 * throttled page dirties in dirty_throttle_leaks on task exit and charge them
1998 * balance_dirty_pages_ratelimited_flags - Balance dirty memory state.
1999 * @mapping: address_space which was dirtied.
2008 * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to
2014 int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, in balance_dirty_pages_ratelimited_flags() argument
2017 struct inode *inode = mapping->host; in balance_dirty_pages_ratelimited_flags()
2024 if (!(bdi->capabilities & BDI_CAP_WRITEBACK)) in balance_dirty_pages_ratelimited_flags()
2030 wb = &bdi->wb; in balance_dirty_pages_ratelimited_flags()
2032 ratelimit = current->nr_dirtied_pause; in balance_dirty_pages_ratelimited_flags()
2033 if (wb->dirty_exceeded) in balance_dirty_pages_ratelimited_flags()
2034 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); in balance_dirty_pages_ratelimited_flags()
2041 * time, hence all honoured too large initial task->nr_dirtied_pause. in balance_dirty_pages_ratelimited_flags()
2044 if (unlikely(current->nr_dirtied >= ratelimit)) in balance_dirty_pages_ratelimited_flags()
2052 * short-lived tasks (eg. gcc invocations in a kernel build) escaping in balance_dirty_pages_ratelimited_flags()
2053 * the dirty throttling and livelock other long-run dirtiers. in balance_dirty_pages_ratelimited_flags()
2056 if (*p > 0 && current->nr_dirtied < ratelimit) { in balance_dirty_pages_ratelimited_flags()
2058 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied); in balance_dirty_pages_ratelimited_flags()
2059 *p -= nr_pages_dirtied; in balance_dirty_pages_ratelimited_flags()
2060 current->nr_dirtied += nr_pages_dirtied; in balance_dirty_pages_ratelimited_flags()
2064 if (unlikely(current->nr_dirtied >= ratelimit)) in balance_dirty_pages_ratelimited_flags()
2065 ret = balance_dirty_pages(wb, current->nr_dirtied, flags); in balance_dirty_pages_ratelimited_flags()
2073 * balance_dirty_pages_ratelimited - balance dirty memory state.
2074 * @mapping: address_space which was dirtied.
2080 * Once we're over the dirty memory limit we decrease the ratelimiting
2081 * by a lot, to prevent individual processes from overshooting the limit
2084 void balance_dirty_pages_ratelimited(struct address_space *mapping) in balance_dirty_pages_ratelimited() argument
2086 balance_dirty_pages_ratelimited_flags(mapping, 0); in balance_dirty_pages_ratelimited()
2091 * wb_over_bg_thresh - does @wb need to be written back?
2113 gdtc->avail = global_dirtyable_memory(); in wb_over_bg_thresh()
2114 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY); in wb_over_bg_thresh()
2117 if (gdtc->dirty > gdtc->bg_thresh) in wb_over_bg_thresh()
2120 thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh); in wb_over_bg_thresh()
2132 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, in wb_over_bg_thresh()
2137 if (mdtc->dirty > mdtc->bg_thresh) in wb_over_bg_thresh()
2140 thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh); in wb_over_bg_thresh()
2167 * and a different non-zero value will wakeup the writeback threads. in dirty_writeback_centisecs_handler()
2191 * then push it back - the user is still using the disk.
2195 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); in laptop_io_completion()
2210 del_timer(&bdi->laptop_mode_wb_timer); in laptop_sync_completion()
2216 * If ratelimit_pages is too high then we can get into dirty-data overload
2231 dom->dirty_limit = dirty_thresh; in writeback_set_ratelimit()
2328 * is now applied to total non-HIGHPAGE memory, and as such we can't
2331 * non-HIGHMEM memory.
2350 * tag_pages_for_writeback - tag pages to be written by write_cache_pages
2351 * @mapping: address space structure to write
2363 void tag_pages_for_writeback(struct address_space *mapping, in tag_pages_for_writeback() argument
2366 XA_STATE(xas, &mapping->i_pages, start); in tag_pages_for_writeback()
2386 …* write_cache_pages - walk the list of dirty pages of the given address space and write all of the…
2387 * @mapping: address space structure to write
2388 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2393 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2394 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2396 * the call was made get new I/O started against them. If wbc->sync_mode is
2402 * writing them. For data-integrity sync we have to be careful so that we do
2410 * lock/page writeback access order inversion - we should only ever lock
2411 * multiple pages in ascending page->index order, and looping back to the start
2416 int write_cache_pages(struct address_space *mapping, in write_cache_pages() argument
2432 if (wbc->range_cyclic) { in write_cache_pages()
2433 index = mapping->writeback_index; /* prev offset */ in write_cache_pages()
2434 end = -1; in write_cache_pages()
2436 index = wbc->range_start >> PAGE_SHIFT; in write_cache_pages()
2437 end = wbc->range_end >> PAGE_SHIFT; in write_cache_pages()
2438 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) in write_cache_pages()
2441 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { in write_cache_pages()
2442 tag_pages_for_writeback(mapping, index, end); in write_cache_pages()
2451 nr_folios = filemap_get_folios_tag(mapping, &index, end, in write_cache_pages()
2461 done_index = folio->index; in write_cache_pages()
2473 if (unlikely(folio->mapping != mapping)) { in write_cache_pages()
2485 if (wbc->sync_mode != WB_SYNC_NONE) in write_cache_pages()
2495 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); in write_cache_pages()
2514 } else if (wbc->sync_mode != WB_SYNC_ALL) { in write_cache_pages()
2516 done_index = folio->index + nr; in write_cache_pages()
2530 wbc->nr_to_write -= nr; in write_cache_pages()
2531 if (wbc->nr_to_write <= 0 && in write_cache_pages()
2532 wbc->sync_mode == WB_SYNC_NONE) { in write_cache_pages()
2546 if (wbc->range_cyclic && !done) in write_cache_pages()
2548 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) in write_cache_pages()
2549 mapping->writeback_index = done_index; in write_cache_pages()
2558 struct address_space *mapping = data; in writepage_cb() local
2559 int ret = mapping->a_ops->writepage(&folio->page, wbc); in writepage_cb()
2560 mapping_set_error(mapping, ret); in writepage_cb()
2564 int do_writepages(struct address_space *mapping, struct writeback_control *wbc) in do_writepages() argument
2569 if (wbc->nr_to_write <= 0) in do_writepages()
2571 wb = inode_to_wb_wbc(mapping->host, wbc); in do_writepages()
2574 if (mapping->a_ops->writepages) { in do_writepages()
2575 ret = mapping->a_ops->writepages(mapping, wbc); in do_writepages()
2576 } else if (mapping->a_ops->writepage) { in do_writepages()
2580 ret = write_cache_pages(mapping, wbc, writepage_cb, in do_writepages()
2581 mapping); in do_writepages()
2587 if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL) in do_writepages()
2604 if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + in do_writepages()
2613 bool noop_dirty_folio(struct address_space *mapping, struct folio *folio) in noop_dirty_folio() argument
2629 struct address_space *mapping) in folio_account_dirtied() argument
2631 struct inode *inode = mapping->host; in folio_account_dirtied()
2633 trace_writeback_dirty_folio(folio, mapping); in folio_account_dirtied()
2635 if (mapping_can_writeback(mapping)) { in folio_account_dirtied()
2648 current->nr_dirtied += nr; in folio_account_dirtied()
2664 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); in folio_account_cleaned()
2665 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in folio_account_cleaned()
2666 wb_stat_mod(wb, WB_RECLAIMABLE, -nr); in folio_account_cleaned()
2683 void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, in __folio_mark_dirty() argument
2688 xa_lock_irqsave(&mapping->i_pages, flags); in __folio_mark_dirty()
2689 if (folio->mapping) { /* Race with truncate? */ in __folio_mark_dirty()
2691 folio_account_dirtied(folio, mapping); in __folio_mark_dirty()
2692 __xa_set_mark(&mapping->i_pages, folio_index(folio), in __folio_mark_dirty()
2695 xa_unlock_irqrestore(&mapping->i_pages, flags); in __folio_mark_dirty()
2699 * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
2700 * @mapping: Address space this folio belongs to.
2710 * that case, but not all the buffers. This is a "bottom-up" dirtying,
2711 * whereas block_dirty_folio() is a "top-down" dirtying.
2717 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) in filemap_dirty_folio() argument
2725 __folio_mark_dirty(folio, mapping, !folio_test_private(folio)); in filemap_dirty_folio()
2728 if (mapping->host) { in filemap_dirty_folio()
2730 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); in filemap_dirty_folio()
2737 * folio_redirty_for_writepage - Decline to write a dirty folio.
2751 struct address_space *mapping = folio->mapping; in folio_redirty_for_writepage() local
2755 wbc->pages_skipped += nr; in folio_redirty_for_writepage()
2756 ret = filemap_dirty_folio(mapping, folio); in folio_redirty_for_writepage()
2757 if (mapping && mapping_can_writeback(mapping)) { in folio_redirty_for_writepage()
2758 struct inode *inode = mapping->host; in folio_redirty_for_writepage()
2763 current->nr_dirtied -= nr; in folio_redirty_for_writepage()
2764 node_stat_mod_folio(folio, NR_DIRTIED, -nr); in folio_redirty_for_writepage()
2765 wb_stat_mod(wb, WB_DIRTIED, -nr); in folio_redirty_for_writepage()
2773 * folio_mark_dirty - Mark a folio as being modified.
2781 * unmaps pages before removing the folio from its mapping.
2787 struct address_space *mapping = folio_mapping(folio); in folio_mark_dirty() local
2789 if (likely(mapping)) { in folio_mark_dirty()
2803 return mapping->a_ops->dirty_folio(mapping, folio); in folio_mark_dirty()
2806 return noop_dirty_folio(mapping, folio); in folio_mark_dirty()
2812 * page->mapping->host, and if the page is unlocked. This is because another
2813 * CPU could truncate the page off the mapping and then free the mapping.
2815 * Usually, the page _is_ locked, or the caller is a user-space process which
2846 struct address_space *mapping = folio_mapping(folio); in __folio_cancel_dirty() local
2848 if (mapping_can_writeback(mapping)) { in __folio_cancel_dirty()
2849 struct inode *inode = mapping->host; in __folio_cancel_dirty()
2873 * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
2874 * The ->writepage implementation will run either folio_start_writeback()
2883 struct address_space *mapping = folio_mapping(folio); in folio_clear_dirty_for_io() local
2888 if (mapping && mapping_can_writeback(mapping)) { in folio_clear_dirty_for_io()
2889 struct inode *inode = mapping->host; in folio_clear_dirty_for_io()
2898 * (b) we tell the low-level filesystem to in folio_clear_dirty_for_io()
2909 * has no effect on the actual dirty bit - since in folio_clear_dirty_for_io()
2931 lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); in folio_clear_dirty_for_io()
2932 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in folio_clear_dirty_for_io()
2933 wb_stat_mod(wb, WB_RECLAIMABLE, -nr); in folio_clear_dirty_for_io()
2945 atomic_inc(&wb->writeback_inodes); in wb_inode_writeback_start()
2951 atomic_dec(&wb->writeback_inodes); in wb_inode_writeback_end()
2959 spin_lock_irqsave(&wb->work_lock, flags); in wb_inode_writeback_end()
2960 if (test_bit(WB_registered, &wb->state)) in wb_inode_writeback_end()
2961 queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); in wb_inode_writeback_end()
2962 spin_unlock_irqrestore(&wb->work_lock, flags); in wb_inode_writeback_end()
2968 struct address_space *mapping = folio_mapping(folio); in __folio_end_writeback() local
2972 if (mapping && mapping_use_writeback_tags(mapping)) { in __folio_end_writeback()
2973 struct inode *inode = mapping->host; in __folio_end_writeback()
2977 xa_lock_irqsave(&mapping->i_pages, flags); in __folio_end_writeback()
2980 __xa_clear_mark(&mapping->i_pages, folio_index(folio), in __folio_end_writeback()
2982 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { in __folio_end_writeback()
2985 wb_stat_mod(wb, WB_WRITEBACK, -nr); in __folio_end_writeback()
2987 if (!mapping_tagged(mapping, in __folio_end_writeback()
2993 if (mapping->host && !mapping_tagged(mapping, in __folio_end_writeback()
2995 sb_clear_inode_writeback(mapping->host); in __folio_end_writeback()
2997 xa_unlock_irqrestore(&mapping->i_pages, flags); in __folio_end_writeback()
3002 lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); in __folio_end_writeback()
3003 zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); in __folio_end_writeback()
3013 struct address_space *mapping = folio_mapping(folio); in __folio_start_writeback() local
3018 if (mapping && mapping_use_writeback_tags(mapping)) { in __folio_start_writeback()
3019 XA_STATE(xas, &mapping->i_pages, folio_index(folio)); in __folio_start_writeback()
3020 struct inode *inode = mapping->host; in __folio_start_writeback()
3030 on_wblist = mapping_tagged(mapping, in __folio_start_writeback()
3034 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { in __folio_start_writeback()
3047 if (mapping->host && !on_wblist) in __folio_start_writeback()
3048 sb_mark_inode_writeback(mapping->host); in __folio_start_writeback()
3075 * folio_wait_writeback - Wait for a folio to finish writeback.
3096 * folio_wait_writeback_killable - Wait for a folio to finish writeback.
3106 * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
3113 return -EINTR; in folio_wait_writeback_killable()
3121 * folio_wait_stable() - wait for writeback to finish, if necessary.