1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2022 Huawei Device Co., Ltd.
4 */
5
6 #include <asm/page.h>
7 #include <linux/mm.h>
8 #include <linux/mm_types.h>
9 #include <linux/radix-tree.h>
10 #include <linux/rmap.h>
11 #include <linux/slab.h>
12 #include <linux/oom.h> /* find_lock_task_mm */
13
14 #include <linux/mm_purgeable.h>
15
16 struct uxpte_t {
17 atomic64_t val;
18 };
19
20 #define UXPTE_SIZE_SHIFT 3
21 #define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT)
22
23 #define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT)
24 #define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT)
25
26 #define UXPTE_PRESENT_BIT 1
27 #define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1)
28 #define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT)
29 #define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE)
30
31 #define vpn(vaddr) ((vaddr) >> PAGE_SHIFT)
32 #define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT)
33 #define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1))
34 #define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT))
35 #define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT)
36 #define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK)
37
uxpte_read(struct uxpte_t * uxpte)38 static inline long uxpte_read(struct uxpte_t *uxpte)
39 {
40 return atomic64_read(&uxpte->val);
41 }
42
uxpte_set(struct uxpte_t * uxpte,long val)43 static inline void uxpte_set(struct uxpte_t *uxpte, long val)
44 {
45 atomic64_set(&uxpte->val, val);
46 }
47
uxpte_cas(struct uxpte_t * uxpte,long old,long new)48 static inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new)
49 {
50 return atomic64_cmpxchg(&uxpte->val, old, new) == old;
51 }
52
mm_init_uxpgd(struct mm_struct * mm)53 void mm_init_uxpgd(struct mm_struct *mm)
54 {
55 mm->uxpgd = NULL;
56 spin_lock_init(&mm->uxpgd_lock);
57 }
58
mm_clear_uxpgd(struct mm_struct * mm)59 void mm_clear_uxpgd(struct mm_struct *mm)
60 {
61 struct page *page = NULL;
62 void **slot = NULL;
63 struct radix_tree_iter iter;
64
65 spin_lock(&mm->uxpgd_lock);
66 if (!mm->uxpgd)
67 goto out;
68 radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
69 page = radix_tree_delete(mm->uxpgd, iter.index);
70 put_page(page);
71 }
72 out:
73 kfree(mm->uxpgd);
74 mm->uxpgd = NULL;
75 spin_unlock(&mm->uxpgd_lock);
76 }
77
78 /* should hold uxpgd_lock before invoke */
lookup_uxpte_page(struct vm_area_struct * vma,unsigned long addr,bool alloc)79 static struct page *lookup_uxpte_page(struct vm_area_struct *vma,
80 unsigned long addr, bool alloc)
81 {
82 struct radix_tree_root *uxpgd = NULL;
83 struct page *page = NULL;
84 struct page *new_page = NULL;
85 struct mm_struct *mm = vma->vm_mm;
86 unsigned long uxpn = uxpte_pn(addr);
87
88 if (mm->uxpgd)
89 goto lookup;
90 if (!alloc)
91 goto out;
92 spin_unlock(&mm->uxpgd_lock);
93 uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL);
94 if (!uxpgd) {
95 pr_err("uxpgd alloc failed.\n");
96 spin_lock(&mm->uxpgd_lock);
97 goto out;
98 }
99 INIT_RADIX_TREE(uxpgd, GFP_KERNEL);
100 spin_lock(&mm->uxpgd_lock);
101 if (mm->uxpgd)
102 kfree(uxpgd);
103 else
104 mm->uxpgd = uxpgd;
105 lookup:
106 page = radix_tree_lookup(mm->uxpgd, uxpn);
107 if (page)
108 goto out;
109 if (!alloc)
110 goto out;
111 spin_unlock(&mm->uxpgd_lock);
112 new_page = alloc_zeroed_user_highpage_movable(vma, addr);
113 if (!new_page) {
114 pr_err("uxpte page alloc fail.\n");
115 spin_lock(&mm->uxpgd_lock);
116 goto out;
117 }
118 if (radix_tree_preload(GFP_KERNEL)) {
119 put_page(new_page);
120 pr_err("radix preload fail.\n");
121 spin_lock(&mm->uxpgd_lock);
122 goto out;
123 }
124 spin_lock(&mm->uxpgd_lock);
125 page = radix_tree_lookup(mm->uxpgd, uxpn);
126 if (page) {
127 put_page(new_page);
128 } else {
129 page = new_page;
130 radix_tree_insert(mm->uxpgd, uxpn, page);
131 }
132 radix_tree_preload_end();
133 out:
134 return page;
135 }
136
137 /* should hold uxpgd_lock before invoke */
lookup_uxpte(struct vm_area_struct * vma,unsigned long addr,bool alloc)138 static struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma,
139 unsigned long addr, bool alloc)
140 {
141 struct uxpte_t *uxpte = NULL;
142 struct page *page = NULL;
143
144 page = lookup_uxpte_page(vma, addr, alloc);
145 if (!page)
146 return NULL;
147 uxpte = page_to_virt(page);
148
149 return uxpte + uxpte_off(addr);
150 }
151
lock_uxpte(struct vm_area_struct * vma,unsigned long addr)152 bool lock_uxpte(struct vm_area_struct *vma, unsigned long addr)
153 {
154 struct uxpte_t *uxpte = NULL;
155 long val = 0;
156
157 spin_lock(&vma->vm_mm->uxpgd_lock);
158 uxpte = lookup_uxpte(vma, addr, true);
159 if (!uxpte)
160 goto unlock;
161 retry:
162 val = uxpte_read(uxpte);
163 if (val >> 1)
164 goto unlock;
165 if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM))
166 goto retry;
167 val = UXPTE_UNDER_RECLAIM;
168 unlock:
169 spin_unlock(&vma->vm_mm->uxpgd_lock);
170
171 return val == UXPTE_UNDER_RECLAIM;
172 }
173
unlock_uxpte(struct vm_area_struct * vma,unsigned long addr)174 void unlock_uxpte(struct vm_area_struct *vma, unsigned long addr)
175 {
176 struct uxpte_t *uxpte = NULL;
177
178 spin_lock(&vma->vm_mm->uxpgd_lock);
179 uxpte = lookup_uxpte(vma, addr, false);
180 if (!uxpte)
181 goto unlock;
182 uxpte_set(uxpte, 0);
183 unlock:
184 spin_unlock(&vma->vm_mm->uxpgd_lock);
185 }
186
uxpte_set_present(struct vm_area_struct * vma,unsigned long addr)187 bool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr)
188 {
189 struct uxpte_t *uxpte = NULL;
190 long val = 0;
191
192 spin_lock(&vma->vm_mm->uxpgd_lock);
193 uxpte = lookup_uxpte(vma, addr, true);
194 if (!uxpte)
195 goto unlock;
196 retry:
197 val = uxpte_read(uxpte);
198 if (val & 1)
199 goto unlock;
200 if (!uxpte_cas(uxpte, val, val + 1))
201 goto retry;
202 val++;
203 unlock:
204 spin_unlock(&vma->vm_mm->uxpgd_lock);
205
206 return val & 1;
207 }
208
uxpte_clear_present(struct vm_area_struct * vma,unsigned long addr)209 void uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr)
210 {
211 struct uxpte_t *uxpte = NULL;
212 long val = 0;
213
214 spin_lock(&vma->vm_mm->uxpgd_lock);
215 uxpte = lookup_uxpte(vma, addr, false);
216 if (!uxpte)
217 goto unlock;
218 retry:
219 val = uxpte_read(uxpte);
220 if (!(val & 1))
221 goto unlock;
222 if (!uxpte_cas(uxpte, val, val - 1))
223 goto retry;
224 unlock:
225 spin_unlock(&vma->vm_mm->uxpgd_lock);
226 }
227
do_uxpte_page_fault(struct vm_fault * vmf,pte_t * entry)228 vm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry)
229 {
230 struct vm_area_struct *vma = vmf->vma;
231 unsigned long vma_uxpn = vma->vm_pgoff;
232 unsigned long off_uxpn = vpn(vmf->address - vma->vm_start);
233 unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn);
234 struct page *page = NULL;
235
236 if (unlikely(anon_vma_prepare(vma)))
237 return VM_FAULT_OOM;
238
239 spin_lock(&vma->vm_mm->uxpgd_lock);
240 page = lookup_uxpte_page(vma, addr, true);
241 spin_unlock(&vma->vm_mm->uxpgd_lock);
242
243 if (!page)
244 return VM_FAULT_OOM;
245
246 *entry = mk_pte(page, vma->vm_page_prot);
247 *entry = pte_sw_mkyoung(*entry);
248 if (vma->vm_flags & VM_WRITE)
249 *entry = pte_mkwrite(pte_mkdirty(*entry));
250 return 0;
251 }
252
__mm_purg_pages_info(struct mm_struct * mm,unsigned long * total_purg_pages,unsigned long * pined_purg_pages)253 static void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
254 unsigned long *pined_purg_pages)
255 {
256 struct page *page = NULL;
257 void **slot = NULL;
258 struct radix_tree_iter iter;
259 struct uxpte_t *uxpte = NULL;
260 long pte_entry = 0;
261 int index = 0;
262 unsigned long nr_total = 0, nr_pined = 0;
263
264 spin_lock(&mm->uxpgd_lock);
265 if (!mm->uxpgd)
266 goto out;
267 radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
268 page = radix_tree_deref_slot(slot);
269 if (unlikely(!page))
270 continue;
271 uxpte = page_to_virt(page);
272 for (index = 0; index < UXPTE_PER_PAGE; index++) {
273 pte_entry = uxpte_read(&(uxpte[index]));
274 if (uxpte_present(pte_entry) == 0) /* not present */
275 continue;
276 nr_total++;
277 if (uxpte_refcnt(pte_entry) > 0) /* pined by user */
278 nr_pined++;
279 }
280 }
281 out:
282 spin_unlock(&mm->uxpgd_lock);
283
284 if (total_purg_pages)
285 *total_purg_pages = nr_total;
286
287 if (pined_purg_pages)
288 *pined_purg_pages = nr_pined;
289 }
290
mm_purg_pages_info(struct mm_struct * mm,unsigned long * total_purg_pages,unsigned long * pined_purg_pages)291 void mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
292 unsigned long *pined_purg_pages)
293 {
294 if (unlikely(!mm))
295 return;
296
297 if (!total_purg_pages && !pined_purg_pages)
298 return;
299
300 __mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages);
301 }
302
purg_pages_info(unsigned long * total_purg_pages,unsigned long * pined_purg_pages)303 void purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages)
304 {
305 struct task_struct *p = NULL;
306 struct task_struct *tsk = NULL;
307 unsigned long mm_nr_purge = 0, mm_nr_pined = 0;
308 unsigned long nr_total = 0, nr_pined = 0;
309
310 if (!total_purg_pages && !pined_purg_pages)
311 return;
312
313 if (total_purg_pages)
314 *total_purg_pages = 0;
315
316 if (pined_purg_pages)
317 *pined_purg_pages = 0;
318
319 rcu_read_lock();
320 for_each_process(p) {
321 tsk = find_lock_task_mm(p);
322 if (!tsk) {
323 /*
324 * It is a kthread or all of p's threads have already
325 * detached their mm's.
326 */
327 continue;
328 }
329 __mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined);
330 nr_total += mm_nr_purge;
331 nr_pined += mm_nr_pined;
332 task_unlock(tsk);
333
334 if (mm_nr_purge > 0) {
335 pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL",
336 mm_nr_pined, mm_nr_purge);
337 }
338 }
339 rcu_read_unlock();
340 if (total_purg_pages)
341 *total_purg_pages = nr_total;
342
343 if (pined_purg_pages)
344 *pined_purg_pages = nr_pined;
345 pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total);
346 }
347