1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * Client Lustre Page.
37 *
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
39 */
40
41 #define DEBUG_SUBSYSTEM S_CLASS
42
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../include/obd_class.h"
45 #include "../include/obd_support.h"
46 #include <linux/list.h>
47
48 #include "../include/cl_object.h"
49 #include "cl_internal.h"
50
51 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
52 int radix);
53
54 # define PASSERT(env, page, expr) \
55 do { \
56 if (unlikely(!(expr))) { \
57 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
58 LASSERT(0); \
59 } \
60 } while (0)
61
62 # define PINVRNT(env, page, exp) \
63 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
64
65 /**
66 * Internal version of cl_page_top, it should be called if the page is
67 * known to be not freed, says with page referenced, or radix tree lock held,
68 * or page owned.
69 */
cl_page_top_trusted(struct cl_page * page)70 static struct cl_page *cl_page_top_trusted(struct cl_page *page)
71 {
72 while (page->cp_parent != NULL)
73 page = page->cp_parent;
74 return page;
75 }
76
77 /**
78 * Internal version of cl_page_get().
79 *
80 * This function can be used to obtain initial reference to previously
81 * unreferenced cached object. It can be called only if concurrent page
82 * reclamation is somehow prevented, e.g., by locking page radix-tree
83 * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
84 * associated with \a page.
85 *
86 * Use with care! Not exported.
87 */
cl_page_get_trust(struct cl_page * page)88 static void cl_page_get_trust(struct cl_page *page)
89 {
90 LASSERT(atomic_read(&page->cp_ref) > 0);
91 atomic_inc(&page->cp_ref);
92 }
93
94 /**
95 * Returns a slice within a page, corresponding to the given layer in the
96 * device stack.
97 *
98 * \see cl_lock_at()
99 */
100 static const struct cl_page_slice *
cl_page_at_trusted(const struct cl_page * page,const struct lu_device_type * dtype)101 cl_page_at_trusted(const struct cl_page *page,
102 const struct lu_device_type *dtype)
103 {
104 const struct cl_page_slice *slice;
105
106 page = cl_page_top_trusted((struct cl_page *)page);
107 do {
108 list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
109 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
110 return slice;
111 }
112 page = page->cp_child;
113 } while (page != NULL);
114 return NULL;
115 }
116
117 /**
118 * Returns a page with given index in the given object, or NULL if no page is
119 * found. Acquires a reference on \a page.
120 *
121 * Locking: called under cl_object_header::coh_page_guard spin-lock.
122 */
cl_page_lookup(struct cl_object_header * hdr,pgoff_t index)123 struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
124 {
125 struct cl_page *page;
126
127 assert_spin_locked(&hdr->coh_page_guard);
128
129 page = radix_tree_lookup(&hdr->coh_tree, index);
130 if (page != NULL)
131 cl_page_get_trust(page);
132 return page;
133 }
134 EXPORT_SYMBOL(cl_page_lookup);
135
136 /**
137 * Returns a list of pages by a given [start, end] of \a obj.
138 *
139 * \param resched If not NULL, then we give up before hogging CPU for too
140 * long and set *resched = 1, in that case caller should implement a retry
141 * logic.
142 *
143 * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
144 * crucial in the face of [offset, EOF] locks.
145 *
146 * Return at least one page in @queue unless there is no covered page.
147 */
cl_page_gang_lookup(const struct lu_env * env,struct cl_object * obj,struct cl_io * io,pgoff_t start,pgoff_t end,cl_page_gang_cb_t cb,void * cbdata)148 int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
149 struct cl_io *io, pgoff_t start, pgoff_t end,
150 cl_page_gang_cb_t cb, void *cbdata)
151 {
152 struct cl_object_header *hdr;
153 struct cl_page *page;
154 struct cl_page **pvec;
155 const struct cl_page_slice *slice;
156 const struct lu_device_type *dtype;
157 pgoff_t idx;
158 unsigned int nr;
159 unsigned int i;
160 unsigned int j;
161 int res = CLP_GANG_OKAY;
162 int tree_lock = 1;
163
164 idx = start;
165 hdr = cl_object_header(obj);
166 pvec = cl_env_info(env)->clt_pvec;
167 dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
168 spin_lock(&hdr->coh_page_guard);
169 while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
170 idx, CLT_PVEC_SIZE)) > 0) {
171 int end_of_region = 0;
172
173 idx = pvec[nr - 1]->cp_index + 1;
174 for (i = 0, j = 0; i < nr; ++i) {
175 page = pvec[i];
176 pvec[i] = NULL;
177
178 LASSERT(page->cp_type == CPT_CACHEABLE);
179 if (page->cp_index > end) {
180 end_of_region = 1;
181 break;
182 }
183 if (page->cp_state == CPS_FREEING)
184 continue;
185
186 slice = cl_page_at_trusted(page, dtype);
187 /*
188 * Pages for lsm-less file has no underneath sub-page
189 * for osc, in case of ...
190 */
191 PASSERT(env, page, slice != NULL);
192
193 page = slice->cpl_page;
194 /*
195 * Can safely call cl_page_get_trust() under
196 * radix-tree spin-lock.
197 *
198 * XXX not true, because @page is from object another
199 * than @hdr and protected by different tree lock.
200 */
201 cl_page_get_trust(page);
202 lu_ref_add_atomic(&page->cp_reference,
203 "gang_lookup", current);
204 pvec[j++] = page;
205 }
206
207 /*
208 * Here a delicate locking dance is performed. Current thread
209 * holds a reference to a page, but has to own it before it
210 * can be placed into queue. Owning implies waiting, so
211 * radix-tree lock is to be released. After a wait one has to
212 * check that pages weren't truncated (cl_page_own() returns
213 * error in the latter case).
214 */
215 spin_unlock(&hdr->coh_page_guard);
216 tree_lock = 0;
217
218 for (i = 0; i < j; ++i) {
219 page = pvec[i];
220 if (res == CLP_GANG_OKAY)
221 res = (*cb)(env, io, page, cbdata);
222 lu_ref_del(&page->cp_reference,
223 "gang_lookup", current);
224 cl_page_put(env, page);
225 }
226 if (nr < CLT_PVEC_SIZE || end_of_region)
227 break;
228
229 if (res == CLP_GANG_OKAY && need_resched())
230 res = CLP_GANG_RESCHED;
231 if (res != CLP_GANG_OKAY)
232 break;
233
234 spin_lock(&hdr->coh_page_guard);
235 tree_lock = 1;
236 }
237 if (tree_lock)
238 spin_unlock(&hdr->coh_page_guard);
239 return res;
240 }
241 EXPORT_SYMBOL(cl_page_gang_lookup);
242
cl_page_free(const struct lu_env * env,struct cl_page * page)243 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
244 {
245 struct cl_object *obj = page->cp_obj;
246
247 PASSERT(env, page, list_empty(&page->cp_batch));
248 PASSERT(env, page, page->cp_owner == NULL);
249 PASSERT(env, page, page->cp_req == NULL);
250 PASSERT(env, page, page->cp_parent == NULL);
251 PASSERT(env, page, page->cp_state == CPS_FREEING);
252
253 might_sleep();
254 while (!list_empty(&page->cp_layers)) {
255 struct cl_page_slice *slice;
256
257 slice = list_entry(page->cp_layers.next,
258 struct cl_page_slice, cpl_linkage);
259 list_del_init(page->cp_layers.next);
260 slice->cpl_ops->cpo_fini(env, slice);
261 }
262 lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
263 cl_object_put(env, obj);
264 lu_ref_fini(&page->cp_reference);
265 kfree(page);
266 }
267
268 /**
269 * Helper function updating page state. This is the only place in the code
270 * where cl_page::cp_state field is mutated.
271 */
cl_page_state_set_trust(struct cl_page * page,enum cl_page_state state)272 static inline void cl_page_state_set_trust(struct cl_page *page,
273 enum cl_page_state state)
274 {
275 /* bypass const. */
276 *(enum cl_page_state *)&page->cp_state = state;
277 }
278
cl_page_alloc(const struct lu_env * env,struct cl_object * o,pgoff_t ind,struct page * vmpage,enum cl_page_type type)279 static struct cl_page *cl_page_alloc(const struct lu_env *env,
280 struct cl_object *o, pgoff_t ind, struct page *vmpage,
281 enum cl_page_type type)
282 {
283 struct cl_page *page;
284 struct lu_object_header *head;
285
286 page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
287 if (page != NULL) {
288 int result = 0;
289
290 atomic_set(&page->cp_ref, 1);
291 if (type == CPT_CACHEABLE) /* for radix tree */
292 atomic_inc(&page->cp_ref);
293 page->cp_obj = o;
294 cl_object_get(o);
295 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
296 page);
297 page->cp_index = ind;
298 cl_page_state_set_trust(page, CPS_CACHED);
299 page->cp_type = type;
300 INIT_LIST_HEAD(&page->cp_layers);
301 INIT_LIST_HEAD(&page->cp_batch);
302 INIT_LIST_HEAD(&page->cp_flight);
303 mutex_init(&page->cp_mutex);
304 lu_ref_init(&page->cp_reference);
305 head = o->co_lu.lo_header;
306 list_for_each_entry(o, &head->loh_layers,
307 co_lu.lo_linkage) {
308 if (o->co_ops->coo_page_init != NULL) {
309 result = o->co_ops->coo_page_init(env, o,
310 page, vmpage);
311 if (result != 0) {
312 cl_page_delete0(env, page, 0);
313 cl_page_free(env, page);
314 page = ERR_PTR(result);
315 break;
316 }
317 }
318 }
319 } else {
320 page = ERR_PTR(-ENOMEM);
321 }
322 return page;
323 }
324
325 /**
326 * Returns a cl_page with index \a idx at the object \a o, and associated with
327 * the VM page \a vmpage.
328 *
329 * This is the main entry point into the cl_page caching interface. First, a
330 * cache (implemented as a per-object radix tree) is consulted. If page is
331 * found there, it is returned immediately. Otherwise new page is allocated
332 * and returned. In any case, additional reference to page is acquired.
333 *
334 * \see cl_object_find(), cl_lock_find()
335 */
cl_page_find0(const struct lu_env * env,struct cl_object * o,pgoff_t idx,struct page * vmpage,enum cl_page_type type,struct cl_page * parent)336 static struct cl_page *cl_page_find0(const struct lu_env *env,
337 struct cl_object *o,
338 pgoff_t idx, struct page *vmpage,
339 enum cl_page_type type,
340 struct cl_page *parent)
341 {
342 struct cl_page *page = NULL;
343 struct cl_page *ghost = NULL;
344 struct cl_object_header *hdr;
345 int err;
346
347 LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
348 might_sleep();
349
350 hdr = cl_object_header(o);
351
352 CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
353 idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
354 /* fast path. */
355 if (type == CPT_CACHEABLE) {
356 /*
357 * vmpage lock is used to protect the child/parent
358 * relationship
359 */
360 KLASSERT(PageLocked(vmpage));
361 /*
362 * cl_vmpage_page() can be called here without any locks as
363 *
364 * - "vmpage" is locked (which prevents ->private from
365 * concurrent updates), and
366 *
367 * - "o" cannot be destroyed while current thread holds a
368 * reference on it.
369 */
370 page = cl_vmpage_page(vmpage, o);
371 PINVRNT(env, page,
372 ergo(page != NULL,
373 cl_page_vmpage(env, page) == vmpage &&
374 (void *)radix_tree_lookup(&hdr->coh_tree,
375 idx) == page));
376 }
377
378 if (page != NULL)
379 return page;
380
381 /* allocate and initialize cl_page */
382 page = cl_page_alloc(env, o, idx, vmpage, type);
383 if (IS_ERR(page))
384 return page;
385
386 if (type == CPT_TRANSIENT) {
387 if (parent) {
388 LASSERT(page->cp_parent == NULL);
389 page->cp_parent = parent;
390 parent->cp_child = page;
391 }
392 return page;
393 }
394
395 /*
396 * XXX optimization: use radix_tree_preload() here, and change tree
397 * gfp mask to GFP_KERNEL in cl_object_header_init().
398 */
399 spin_lock(&hdr->coh_page_guard);
400 err = radix_tree_insert(&hdr->coh_tree, idx, page);
401 if (err != 0) {
402 ghost = page;
403 /*
404 * Noted by Jay: a lock on \a vmpage protects cl_page_find()
405 * from this race, but
406 *
407 * 0. it's better to have cl_page interface "locally
408 * consistent" so that its correctness can be reasoned
409 * about without appealing to the (obscure world of) VM
410 * locking.
411 *
412 * 1. handling this race allows ->coh_tree to remain
413 * consistent even when VM locking is somehow busted,
414 * which is very useful during diagnosing and debugging.
415 */
416 page = ERR_PTR(err);
417 CL_PAGE_DEBUG(D_ERROR, env, ghost,
418 "fail to insert into radix tree: %d\n", err);
419 } else {
420 if (parent) {
421 LASSERT(page->cp_parent == NULL);
422 page->cp_parent = parent;
423 parent->cp_child = page;
424 }
425 hdr->coh_pages++;
426 }
427 spin_unlock(&hdr->coh_page_guard);
428
429 if (unlikely(ghost != NULL)) {
430 cl_page_delete0(env, ghost, 0);
431 cl_page_free(env, ghost);
432 }
433 return page;
434 }
435
cl_page_find(const struct lu_env * env,struct cl_object * o,pgoff_t idx,struct page * vmpage,enum cl_page_type type)436 struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
437 pgoff_t idx, struct page *vmpage,
438 enum cl_page_type type)
439 {
440 return cl_page_find0(env, o, idx, vmpage, type, NULL);
441 }
442 EXPORT_SYMBOL(cl_page_find);
443
cl_page_find_sub(const struct lu_env * env,struct cl_object * o,pgoff_t idx,struct page * vmpage,struct cl_page * parent)444 struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
445 pgoff_t idx, struct page *vmpage,
446 struct cl_page *parent)
447 {
448 return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
449 }
450 EXPORT_SYMBOL(cl_page_find_sub);
451
cl_page_invariant(const struct cl_page * pg)452 static inline int cl_page_invariant(const struct cl_page *pg)
453 {
454 struct cl_object_header *header;
455 struct cl_page *parent;
456 struct cl_page *child;
457 struct cl_io *owner;
458
459 /*
460 * Page invariant is protected by a VM lock.
461 */
462 LINVRNT(cl_page_is_vmlocked(NULL, pg));
463
464 header = cl_object_header(pg->cp_obj);
465 parent = pg->cp_parent;
466 child = pg->cp_child;
467 owner = pg->cp_owner;
468
469 return cl_page_in_use(pg) &&
470 ergo(parent != NULL, parent->cp_child == pg) &&
471 ergo(child != NULL, child->cp_parent == pg) &&
472 ergo(child != NULL, pg->cp_obj != child->cp_obj) &&
473 ergo(parent != NULL, pg->cp_obj != parent->cp_obj) &&
474 ergo(owner != NULL && parent != NULL,
475 parent->cp_owner == pg->cp_owner->ci_parent) &&
476 ergo(owner != NULL && child != NULL,
477 child->cp_owner->ci_parent == owner) &&
478 /*
479 * Either page is early in initialization (has neither child
480 * nor parent yet), or it is in the object radix tree.
481 */
482 ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
483 (void *)radix_tree_lookup(&header->coh_tree,
484 pg->cp_index) == pg ||
485 (child == NULL && parent == NULL));
486 }
487
cl_page_state_set0(const struct lu_env * env,struct cl_page * page,enum cl_page_state state)488 static void cl_page_state_set0(const struct lu_env *env,
489 struct cl_page *page, enum cl_page_state state)
490 {
491 enum cl_page_state old;
492
493 /*
494 * Matrix of allowed state transitions [old][new], for sanity
495 * checking.
496 */
497 static const int allowed_transitions[CPS_NR][CPS_NR] = {
498 [CPS_CACHED] = {
499 [CPS_CACHED] = 0,
500 [CPS_OWNED] = 1, /* io finds existing cached page */
501 [CPS_PAGEIN] = 0,
502 [CPS_PAGEOUT] = 1, /* write-out from the cache */
503 [CPS_FREEING] = 1, /* eviction on the memory pressure */
504 },
505 [CPS_OWNED] = {
506 [CPS_CACHED] = 1, /* release to the cache */
507 [CPS_OWNED] = 0,
508 [CPS_PAGEIN] = 1, /* start read immediately */
509 [CPS_PAGEOUT] = 1, /* start write immediately */
510 [CPS_FREEING] = 1, /* lock invalidation or truncate */
511 },
512 [CPS_PAGEIN] = {
513 [CPS_CACHED] = 1, /* io completion */
514 [CPS_OWNED] = 0,
515 [CPS_PAGEIN] = 0,
516 [CPS_PAGEOUT] = 0,
517 [CPS_FREEING] = 0,
518 },
519 [CPS_PAGEOUT] = {
520 [CPS_CACHED] = 1, /* io completion */
521 [CPS_OWNED] = 0,
522 [CPS_PAGEIN] = 0,
523 [CPS_PAGEOUT] = 0,
524 [CPS_FREEING] = 0,
525 },
526 [CPS_FREEING] = {
527 [CPS_CACHED] = 0,
528 [CPS_OWNED] = 0,
529 [CPS_PAGEIN] = 0,
530 [CPS_PAGEOUT] = 0,
531 [CPS_FREEING] = 0,
532 }
533 };
534
535 old = page->cp_state;
536 PASSERT(env, page, allowed_transitions[old][state]);
537 CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
538 for (; page != NULL; page = page->cp_child) {
539 PASSERT(env, page, page->cp_state == old);
540 PASSERT(env, page,
541 equi(state == CPS_OWNED, page->cp_owner != NULL));
542
543 cl_page_state_set_trust(page, state);
544 }
545 }
546
cl_page_state_set(const struct lu_env * env,struct cl_page * page,enum cl_page_state state)547 static void cl_page_state_set(const struct lu_env *env,
548 struct cl_page *page, enum cl_page_state state)
549 {
550 cl_page_state_set0(env, page, state);
551 }
552
553 /**
554 * Acquires an additional reference to a page.
555 *
556 * This can be called only by caller already possessing a reference to \a
557 * page.
558 *
559 * \see cl_object_get(), cl_lock_get().
560 */
cl_page_get(struct cl_page * page)561 void cl_page_get(struct cl_page *page)
562 {
563 cl_page_get_trust(page);
564 }
565 EXPORT_SYMBOL(cl_page_get);
566
567 /**
568 * Releases a reference to a page.
569 *
570 * When last reference is released, page is returned to the cache, unless it
571 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
572 * destroyed.
573 *
574 * \see cl_object_put(), cl_lock_put().
575 */
cl_page_put(const struct lu_env * env,struct cl_page * page)576 void cl_page_put(const struct lu_env *env, struct cl_page *page)
577 {
578 PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
579
580 CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
581 atomic_read(&page->cp_ref));
582
583 if (atomic_dec_and_test(&page->cp_ref)) {
584 LASSERT(page->cp_state == CPS_FREEING);
585
586 LASSERT(atomic_read(&page->cp_ref) == 0);
587 PASSERT(env, page, page->cp_owner == NULL);
588 PASSERT(env, page, list_empty(&page->cp_batch));
589 /*
590 * Page is no longer reachable by other threads. Tear
591 * it down.
592 */
593 cl_page_free(env, page);
594 }
595 }
596 EXPORT_SYMBOL(cl_page_put);
597
598 /**
599 * Returns a VM page associated with a given cl_page.
600 */
cl_page_vmpage(const struct lu_env * env,struct cl_page * page)601 struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
602 {
603 const struct cl_page_slice *slice;
604
605 /*
606 * Find uppermost layer with ->cpo_vmpage() method, and return its
607 * result.
608 */
609 page = cl_page_top(page);
610 do {
611 list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
612 if (slice->cpl_ops->cpo_vmpage != NULL)
613 return slice->cpl_ops->cpo_vmpage(env, slice);
614 }
615 page = page->cp_child;
616 } while (page != NULL);
617 LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
618 }
619 EXPORT_SYMBOL(cl_page_vmpage);
620
621 /**
622 * Returns a cl_page associated with a VM page, and given cl_object.
623 */
cl_vmpage_page(struct page * vmpage,struct cl_object * obj)624 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
625 {
626 struct cl_page *top;
627 struct cl_page *page;
628
629 KLASSERT(PageLocked(vmpage));
630
631 /*
632 * NOTE: absence of races and liveness of data are guaranteed by page
633 * lock on a "vmpage". That works because object destruction has
634 * bottom-to-top pass.
635 */
636
637 /*
638 * This loop assumes that ->private points to the top-most page. This
639 * can be rectified easily.
640 */
641 top = (struct cl_page *)vmpage->private;
642 if (top == NULL)
643 return NULL;
644
645 for (page = top; page != NULL; page = page->cp_child) {
646 if (cl_object_same(page->cp_obj, obj)) {
647 cl_page_get_trust(page);
648 break;
649 }
650 }
651 LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
652 return page;
653 }
654 EXPORT_SYMBOL(cl_vmpage_page);
655
656 /**
657 * Returns the top-page for a given page.
658 *
659 * \see cl_object_top(), cl_io_top()
660 */
cl_page_top(struct cl_page * page)661 struct cl_page *cl_page_top(struct cl_page *page)
662 {
663 return cl_page_top_trusted(page);
664 }
665 EXPORT_SYMBOL(cl_page_top);
666
cl_page_at(const struct cl_page * page,const struct lu_device_type * dtype)667 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
668 const struct lu_device_type *dtype)
669 {
670 return cl_page_at_trusted(page, dtype);
671 }
672 EXPORT_SYMBOL(cl_page_at);
673
674 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
675
676 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
677 ({ \
678 const struct lu_env *__env = (_env); \
679 struct cl_page *__page = (_page); \
680 const struct cl_page_slice *__scan; \
681 int __result; \
682 ptrdiff_t __op = (_op); \
683 int (*__method)_proto; \
684 \
685 __result = 0; \
686 __page = cl_page_top(__page); \
687 do { \
688 list_for_each_entry(__scan, &__page->cp_layers, \
689 cpl_linkage) { \
690 __method = *(void **)((char *)__scan->cpl_ops + \
691 __op); \
692 if (__method != NULL) { \
693 __result = (*__method)(__env, __scan, \
694 ## __VA_ARGS__); \
695 if (__result != 0) \
696 break; \
697 } \
698 } \
699 __page = __page->cp_child; \
700 } while (__page != NULL && __result == 0); \
701 if (__result > 0) \
702 __result = 0; \
703 __result; \
704 })
705
706 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
707 do { \
708 const struct lu_env *__env = (_env); \
709 struct cl_page *__page = (_page); \
710 const struct cl_page_slice *__scan; \
711 ptrdiff_t __op = (_op); \
712 void (*__method)_proto; \
713 \
714 __page = cl_page_top(__page); \
715 do { \
716 list_for_each_entry(__scan, &__page->cp_layers, \
717 cpl_linkage) { \
718 __method = *(void **)((char *)__scan->cpl_ops + \
719 __op); \
720 if (__method != NULL) \
721 (*__method)(__env, __scan, \
722 ## __VA_ARGS__); \
723 } \
724 __page = __page->cp_child; \
725 } while (__page != NULL); \
726 } while (0)
727
728 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
729 do { \
730 const struct lu_env *__env = (_env); \
731 struct cl_page *__page = (_page); \
732 const struct cl_page_slice *__scan; \
733 ptrdiff_t __op = (_op); \
734 void (*__method)_proto; \
735 \
736 /* get to the bottom page. */ \
737 while (__page->cp_child != NULL) \
738 __page = __page->cp_child; \
739 do { \
740 list_for_each_entry_reverse(__scan, &__page->cp_layers, \
741 cpl_linkage) { \
742 __method = *(void **)((char *)__scan->cpl_ops + \
743 __op); \
744 if (__method != NULL) \
745 (*__method)(__env, __scan, \
746 ## __VA_ARGS__); \
747 } \
748 __page = __page->cp_parent; \
749 } while (__page != NULL); \
750 } while (0)
751
cl_page_invoke(const struct lu_env * env,struct cl_io * io,struct cl_page * page,ptrdiff_t op)752 static int cl_page_invoke(const struct lu_env *env,
753 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
754
755 {
756 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
757 return CL_PAGE_INVOKE(env, page, op,
758 (const struct lu_env *,
759 const struct cl_page_slice *, struct cl_io *),
760 io);
761 }
762
cl_page_invoid(const struct lu_env * env,struct cl_io * io,struct cl_page * page,ptrdiff_t op)763 static void cl_page_invoid(const struct lu_env *env,
764 struct cl_io *io, struct cl_page *page, ptrdiff_t op)
765
766 {
767 PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
768 CL_PAGE_INVOID(env, page, op,
769 (const struct lu_env *,
770 const struct cl_page_slice *, struct cl_io *), io);
771 }
772
cl_page_owner_clear(struct cl_page * page)773 static void cl_page_owner_clear(struct cl_page *page)
774 {
775 for (page = cl_page_top(page); page != NULL; page = page->cp_child) {
776 if (page->cp_owner != NULL) {
777 LASSERT(page->cp_owner->ci_owned_nr > 0);
778 page->cp_owner->ci_owned_nr--;
779 page->cp_owner = NULL;
780 page->cp_task = NULL;
781 }
782 }
783 }
784
cl_page_owner_set(struct cl_page * page)785 static void cl_page_owner_set(struct cl_page *page)
786 {
787 for (page = cl_page_top(page); page != NULL; page = page->cp_child) {
788 LASSERT(page->cp_owner != NULL);
789 page->cp_owner->ci_owned_nr++;
790 }
791 }
792
cl_page_disown0(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)793 void cl_page_disown0(const struct lu_env *env,
794 struct cl_io *io, struct cl_page *pg)
795 {
796 enum cl_page_state state;
797
798 state = pg->cp_state;
799 PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
800 PINVRNT(env, pg, cl_page_invariant(pg));
801 cl_page_owner_clear(pg);
802
803 if (state == CPS_OWNED)
804 cl_page_state_set(env, pg, CPS_CACHED);
805 /*
806 * Completion call-backs are executed in the bottom-up order, so that
807 * uppermost layer (llite), responsible for VFS/VM interaction runs
808 * last and can release locks safely.
809 */
810 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
811 (const struct lu_env *,
812 const struct cl_page_slice *, struct cl_io *),
813 io);
814 }
815
816 /**
817 * returns true, iff page is owned by the given io.
818 */
cl_page_is_owned(const struct cl_page * pg,const struct cl_io * io)819 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
820 {
821 LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
822 return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
823 }
824 EXPORT_SYMBOL(cl_page_is_owned);
825
826 /**
827 * Try to own a page by IO.
828 *
829 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
830 * into cl_page_state::CPS_OWNED state.
831 *
832 * \pre !cl_page_is_owned(pg, io)
833 * \post result == 0 iff cl_page_is_owned(pg, io)
834 *
835 * \retval 0 success
836 *
837 * \retval -ve failure, e.g., page was destroyed (and landed in
838 * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
839 * or, page was owned by another thread, or in IO.
840 *
841 * \see cl_page_disown()
842 * \see cl_page_operations::cpo_own()
843 * \see cl_page_own_try()
844 * \see cl_page_own
845 */
cl_page_own0(const struct lu_env * env,struct cl_io * io,struct cl_page * pg,int nonblock)846 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
847 struct cl_page *pg, int nonblock)
848 {
849 int result;
850
851 PINVRNT(env, pg, !cl_page_is_owned(pg, io));
852
853 pg = cl_page_top(pg);
854 io = cl_io_top(io);
855
856 if (pg->cp_state == CPS_FREEING) {
857 result = -ENOENT;
858 } else {
859 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
860 (const struct lu_env *,
861 const struct cl_page_slice *,
862 struct cl_io *, int),
863 io, nonblock);
864 if (result == 0) {
865 PASSERT(env, pg, pg->cp_owner == NULL);
866 PASSERT(env, pg, pg->cp_req == NULL);
867 pg->cp_owner = io;
868 pg->cp_task = current;
869 cl_page_owner_set(pg);
870 if (pg->cp_state != CPS_FREEING) {
871 cl_page_state_set(env, pg, CPS_OWNED);
872 } else {
873 cl_page_disown0(env, io, pg);
874 result = -ENOENT;
875 }
876 }
877 }
878 PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
879 return result;
880 }
881
882 /**
883 * Own a page, might be blocked.
884 *
885 * \see cl_page_own0()
886 */
cl_page_own(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)887 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
888 {
889 return cl_page_own0(env, io, pg, 0);
890 }
891 EXPORT_SYMBOL(cl_page_own);
892
893 /**
894 * Nonblock version of cl_page_own().
895 *
896 * \see cl_page_own0()
897 */
cl_page_own_try(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)898 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
899 struct cl_page *pg)
900 {
901 return cl_page_own0(env, io, pg, 1);
902 }
903 EXPORT_SYMBOL(cl_page_own_try);
904
905 /**
906 * Assume page ownership.
907 *
908 * Called when page is already locked by the hosting VM.
909 *
910 * \pre !cl_page_is_owned(pg, io)
911 * \post cl_page_is_owned(pg, io)
912 *
913 * \see cl_page_operations::cpo_assume()
914 */
cl_page_assume(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)915 void cl_page_assume(const struct lu_env *env,
916 struct cl_io *io, struct cl_page *pg)
917 {
918 PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
919
920 pg = cl_page_top(pg);
921 io = cl_io_top(io);
922
923 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
924 PASSERT(env, pg, pg->cp_owner == NULL);
925 pg->cp_owner = io;
926 pg->cp_task = current;
927 cl_page_owner_set(pg);
928 cl_page_state_set(env, pg, CPS_OWNED);
929 }
930 EXPORT_SYMBOL(cl_page_assume);
931
932 /**
933 * Releases page ownership without unlocking the page.
934 *
935 * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
936 * underlying VM page (as VM is supposed to do this itself).
937 *
938 * \pre cl_page_is_owned(pg, io)
939 * \post !cl_page_is_owned(pg, io)
940 *
941 * \see cl_page_assume()
942 */
cl_page_unassume(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)943 void cl_page_unassume(const struct lu_env *env,
944 struct cl_io *io, struct cl_page *pg)
945 {
946 PINVRNT(env, pg, cl_page_is_owned(pg, io));
947 PINVRNT(env, pg, cl_page_invariant(pg));
948
949 pg = cl_page_top(pg);
950 io = cl_io_top(io);
951 cl_page_owner_clear(pg);
952 cl_page_state_set(env, pg, CPS_CACHED);
953 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
954 (const struct lu_env *,
955 const struct cl_page_slice *, struct cl_io *),
956 io);
957 }
958 EXPORT_SYMBOL(cl_page_unassume);
959
960 /**
961 * Releases page ownership.
962 *
963 * Moves page into cl_page_state::CPS_CACHED.
964 *
965 * \pre cl_page_is_owned(pg, io)
966 * \post !cl_page_is_owned(pg, io)
967 *
968 * \see cl_page_own()
969 * \see cl_page_operations::cpo_disown()
970 */
cl_page_disown(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)971 void cl_page_disown(const struct lu_env *env,
972 struct cl_io *io, struct cl_page *pg)
973 {
974 PINVRNT(env, pg, cl_page_is_owned(pg, io));
975
976 pg = cl_page_top(pg);
977 io = cl_io_top(io);
978 cl_page_disown0(env, io, pg);
979 }
980 EXPORT_SYMBOL(cl_page_disown);
981
982 /**
983 * Called when page is to be removed from the object, e.g., as a result of
984 * truncate.
985 *
986 * Calls cl_page_operations::cpo_discard() top-to-bottom.
987 *
988 * \pre cl_page_is_owned(pg, io)
989 *
990 * \see cl_page_operations::cpo_discard()
991 */
cl_page_discard(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)992 void cl_page_discard(const struct lu_env *env,
993 struct cl_io *io, struct cl_page *pg)
994 {
995 PINVRNT(env, pg, cl_page_is_owned(pg, io));
996 PINVRNT(env, pg, cl_page_invariant(pg));
997
998 cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
999 }
1000 EXPORT_SYMBOL(cl_page_discard);
1001
1002 /**
1003 * Version of cl_page_delete() that can be called for not fully constructed
1004 * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
1005 * path. Doesn't check page invariant.
1006 */
cl_page_delete0(const struct lu_env * env,struct cl_page * pg,int radix)1007 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
1008 int radix)
1009 {
1010 struct cl_page *tmp = pg;
1011
1012 PASSERT(env, pg, pg == cl_page_top(pg));
1013 PASSERT(env, pg, pg->cp_state != CPS_FREEING);
1014
1015 /*
1016 * Severe all ways to obtain new pointers to @pg.
1017 */
1018 cl_page_owner_clear(pg);
1019
1020 /*
1021 * unexport the page firstly before freeing it so that
1022 * the page content is considered to be invalid.
1023 * We have to do this because a CPS_FREEING cl_page may
1024 * be NOT under the protection of a cl_lock.
1025 * Afterwards, if this page is found by other threads, then this
1026 * page will be forced to reread.
1027 */
1028 cl_page_export(env, pg, 0);
1029 cl_page_state_set0(env, pg, CPS_FREEING);
1030
1031 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
1032 (const struct lu_env *, const struct cl_page_slice *));
1033
1034 if (tmp->cp_type == CPT_CACHEABLE) {
1035 if (!radix)
1036 /* !radix means that @pg is not yet in the radix tree,
1037 * skip removing it.
1038 */
1039 tmp = pg->cp_child;
1040 for (; tmp != NULL; tmp = tmp->cp_child) {
1041 void *value;
1042 struct cl_object_header *hdr;
1043
1044 hdr = cl_object_header(tmp->cp_obj);
1045 spin_lock(&hdr->coh_page_guard);
1046 value = radix_tree_delete(&hdr->coh_tree,
1047 tmp->cp_index);
1048 PASSERT(env, tmp, value == tmp);
1049 PASSERT(env, tmp, hdr->coh_pages > 0);
1050 hdr->coh_pages--;
1051 spin_unlock(&hdr->coh_page_guard);
1052 cl_page_put(env, tmp);
1053 }
1054 }
1055 }
1056
1057 /**
1058 * Called when a decision is made to throw page out of memory.
1059 *
1060 * Notifies all layers about page destruction by calling
1061 * cl_page_operations::cpo_delete() method top-to-bottom.
1062 *
1063 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
1064 * where transition to this state happens).
1065 *
1066 * Eliminates all venues through which new references to the page can be
1067 * obtained:
1068 *
1069 * - removes page from the radix trees,
1070 *
1071 * - breaks linkage from VM page to cl_page.
1072 *
1073 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
1074 * drain after some time, at which point page will be recycled.
1075 *
1076 * \pre pg == cl_page_top(pg)
1077 * \pre VM page is locked
1078 * \post pg->cp_state == CPS_FREEING
1079 *
1080 * \see cl_page_operations::cpo_delete()
1081 */
cl_page_delete(const struct lu_env * env,struct cl_page * pg)1082 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
1083 {
1084 PINVRNT(env, pg, cl_page_invariant(pg));
1085 cl_page_delete0(env, pg, 1);
1086 }
1087 EXPORT_SYMBOL(cl_page_delete);
1088
1089 /**
1090 * Unmaps page from user virtual memory.
1091 *
1092 * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
1093 * layer responsible for VM interaction has to unmap page from user space
1094 * virtual memory.
1095 *
1096 * \see cl_page_operations::cpo_unmap()
1097 */
cl_page_unmap(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)1098 int cl_page_unmap(const struct lu_env *env,
1099 struct cl_io *io, struct cl_page *pg)
1100 {
1101 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1102 PINVRNT(env, pg, cl_page_invariant(pg));
1103
1104 return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
1105 }
1106 EXPORT_SYMBOL(cl_page_unmap);
1107
1108 /**
1109 * Marks page up-to-date.
1110 *
1111 * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
1112 * layer responsible for VM interaction has to mark/clear page as up-to-date
1113 * by the \a uptodate argument.
1114 *
1115 * \see cl_page_operations::cpo_export()
1116 */
cl_page_export(const struct lu_env * env,struct cl_page * pg,int uptodate)1117 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
1118 {
1119 PINVRNT(env, pg, cl_page_invariant(pg));
1120 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
1121 (const struct lu_env *,
1122 const struct cl_page_slice *, int), uptodate);
1123 }
1124 EXPORT_SYMBOL(cl_page_export);
1125
1126 /**
1127 * Returns true, iff \a pg is VM locked in a suitable sense by the calling
1128 * thread.
1129 */
cl_page_is_vmlocked(const struct lu_env * env,const struct cl_page * pg)1130 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
1131 {
1132 int result;
1133 const struct cl_page_slice *slice;
1134
1135 pg = cl_page_top_trusted((struct cl_page *)pg);
1136 slice = container_of(pg->cp_layers.next,
1137 const struct cl_page_slice, cpl_linkage);
1138 PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked != NULL);
1139 /*
1140 * Call ->cpo_is_vmlocked() directly instead of going through
1141 * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
1142 * cl_page_invariant().
1143 */
1144 result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
1145 PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
1146 return result == -EBUSY;
1147 }
1148 EXPORT_SYMBOL(cl_page_is_vmlocked);
1149
cl_req_type_state(enum cl_req_type crt)1150 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
1151 {
1152 return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
1153 }
1154
cl_page_io_start(const struct lu_env * env,struct cl_page * pg,enum cl_req_type crt)1155 static void cl_page_io_start(const struct lu_env *env,
1156 struct cl_page *pg, enum cl_req_type crt)
1157 {
1158 /*
1159 * Page is queued for IO, change its state.
1160 */
1161 cl_page_owner_clear(pg);
1162 cl_page_state_set(env, pg, cl_req_type_state(crt));
1163 }
1164
1165 /**
1166 * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
1167 * called top-to-bottom. Every layer either agrees to submit this page (by
1168 * returning 0), or requests to omit this page (by returning -EALREADY). Layer
1169 * handling interactions with the VM also has to inform VM that page is under
1170 * transfer now.
1171 */
cl_page_prep(const struct lu_env * env,struct cl_io * io,struct cl_page * pg,enum cl_req_type crt)1172 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
1173 struct cl_page *pg, enum cl_req_type crt)
1174 {
1175 int result;
1176
1177 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1178 PINVRNT(env, pg, cl_page_invariant(pg));
1179 PINVRNT(env, pg, crt < CRT_NR);
1180
1181 /*
1182 * XXX this has to be called bottom-to-top, so that llite can set up
1183 * PG_writeback without risking other layers deciding to skip this
1184 * page.
1185 */
1186 if (crt >= CRT_NR)
1187 return -EINVAL;
1188 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
1189 if (result == 0)
1190 cl_page_io_start(env, pg, crt);
1191
1192 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
1193 return result;
1194 }
1195 EXPORT_SYMBOL(cl_page_prep);
1196
1197 /**
1198 * Notify layers about transfer completion.
1199 *
1200 * Invoked by transfer sub-system (which is a part of osc) to notify layers
1201 * that a transfer, of which this page is a part of has completed.
1202 *
1203 * Completion call-backs are executed in the bottom-up order, so that
1204 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
1205 * and can release locks safely.
1206 *
1207 * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1208 * \post pg->cp_state == CPS_CACHED
1209 *
1210 * \see cl_page_operations::cpo_completion()
1211 */
cl_page_completion(const struct lu_env * env,struct cl_page * pg,enum cl_req_type crt,int ioret)1212 void cl_page_completion(const struct lu_env *env,
1213 struct cl_page *pg, enum cl_req_type crt, int ioret)
1214 {
1215 struct cl_sync_io *anchor = pg->cp_sync_io;
1216
1217 PASSERT(env, pg, crt < CRT_NR);
1218 /* cl_page::cp_req already cleared by the caller (osc_completion()) */
1219 PASSERT(env, pg, pg->cp_req == NULL);
1220 PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
1221
1222 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
1223 if (crt == CRT_READ && ioret == 0) {
1224 PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
1225 pg->cp_flags |= CPF_READ_COMPLETED;
1226 }
1227
1228 cl_page_state_set(env, pg, CPS_CACHED);
1229 if (crt >= CRT_NR)
1230 return;
1231 CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
1232 (const struct lu_env *,
1233 const struct cl_page_slice *, int), ioret);
1234 if (anchor) {
1235 LASSERT(cl_page_is_vmlocked(env, pg));
1236 LASSERT(pg->cp_sync_io == anchor);
1237 pg->cp_sync_io = NULL;
1238 }
1239 /*
1240 * As page->cp_obj is pinned by a reference from page->cp_req, it is
1241 * safe to call cl_page_put() without risking object destruction in a
1242 * non-blocking context.
1243 */
1244 cl_page_put(env, pg);
1245
1246 if (anchor)
1247 cl_sync_io_note(anchor, ioret);
1248 }
1249 EXPORT_SYMBOL(cl_page_completion);
1250
1251 /**
1252 * Notify layers that transfer formation engine decided to yank this page from
1253 * the cache and to make it a part of a transfer.
1254 *
1255 * \pre pg->cp_state == CPS_CACHED
1256 * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
1257 *
1258 * \see cl_page_operations::cpo_make_ready()
1259 */
cl_page_make_ready(const struct lu_env * env,struct cl_page * pg,enum cl_req_type crt)1260 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
1261 enum cl_req_type crt)
1262 {
1263 int result;
1264
1265 PINVRNT(env, pg, crt < CRT_NR);
1266
1267 if (crt >= CRT_NR)
1268 return -EINVAL;
1269 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
1270 (const struct lu_env *,
1271 const struct cl_page_slice *));
1272 if (result == 0) {
1273 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
1274 cl_page_io_start(env, pg, crt);
1275 }
1276 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
1277 return result;
1278 }
1279 EXPORT_SYMBOL(cl_page_make_ready);
1280
1281 /**
1282 * Notify layers that high level io decided to place this page into a cache
1283 * for future transfer.
1284 *
1285 * The layer implementing transfer engine (osc) has to register this page in
1286 * its queues.
1287 *
1288 * \pre cl_page_is_owned(pg, io)
1289 * \post cl_page_is_owned(pg, io)
1290 *
1291 * \see cl_page_operations::cpo_cache_add()
1292 */
cl_page_cache_add(const struct lu_env * env,struct cl_io * io,struct cl_page * pg,enum cl_req_type crt)1293 int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
1294 struct cl_page *pg, enum cl_req_type crt)
1295 {
1296 const struct cl_page_slice *scan;
1297 int result = 0;
1298
1299 PINVRNT(env, pg, crt < CRT_NR);
1300 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1301 PINVRNT(env, pg, cl_page_invariant(pg));
1302
1303 if (crt >= CRT_NR)
1304 return -EINVAL;
1305
1306 list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
1307 if (scan->cpl_ops->io[crt].cpo_cache_add == NULL)
1308 continue;
1309
1310 result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
1311 if (result != 0)
1312 break;
1313 }
1314 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
1315 return result;
1316 }
1317 EXPORT_SYMBOL(cl_page_cache_add);
1318
1319 /**
1320 * Called if a pge is being written back by kernel's intention.
1321 *
1322 * \pre cl_page_is_owned(pg, io)
1323 * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
1324 *
1325 * \see cl_page_operations::cpo_flush()
1326 */
cl_page_flush(const struct lu_env * env,struct cl_io * io,struct cl_page * pg)1327 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
1328 struct cl_page *pg)
1329 {
1330 int result;
1331
1332 PINVRNT(env, pg, cl_page_is_owned(pg, io));
1333 PINVRNT(env, pg, cl_page_invariant(pg));
1334
1335 result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
1336
1337 CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
1338 return result;
1339 }
1340 EXPORT_SYMBOL(cl_page_flush);
1341
1342 /**
1343 * Checks whether page is protected by any extent lock is at least required
1344 * mode.
1345 *
1346 * \return the same as in cl_page_operations::cpo_is_under_lock() method.
1347 * \see cl_page_operations::cpo_is_under_lock()
1348 */
cl_page_is_under_lock(const struct lu_env * env,struct cl_io * io,struct cl_page * page)1349 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
1350 struct cl_page *page)
1351 {
1352 int rc;
1353
1354 PINVRNT(env, page, cl_page_invariant(page));
1355
1356 rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
1357 (const struct lu_env *,
1358 const struct cl_page_slice *, struct cl_io *),
1359 io);
1360 PASSERT(env, page, rc != 0);
1361 return rc;
1362 }
1363 EXPORT_SYMBOL(cl_page_is_under_lock);
1364
page_prune_cb(const struct lu_env * env,struct cl_io * io,struct cl_page * page,void * cbdata)1365 static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
1366 struct cl_page *page, void *cbdata)
1367 {
1368 cl_page_own(env, io, page);
1369 cl_page_unmap(env, io, page);
1370 cl_page_discard(env, io, page);
1371 cl_page_disown(env, io, page);
1372 return CLP_GANG_OKAY;
1373 }
1374
1375 /**
1376 * Purges all cached pages belonging to the object \a obj.
1377 */
cl_pages_prune(const struct lu_env * env,struct cl_object * clobj)1378 int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
1379 {
1380 struct cl_thread_info *info;
1381 struct cl_object *obj = cl_object_top(clobj);
1382 struct cl_io *io;
1383 int result;
1384
1385 info = cl_env_info(env);
1386 io = &info->clt_io;
1387
1388 /*
1389 * initialize the io. This is ugly since we never do IO in this
1390 * function, we just make cl_page_list functions happy. -jay
1391 */
1392 io->ci_obj = obj;
1393 io->ci_ignore_layout = 1;
1394 result = cl_io_init(env, io, CIT_MISC, obj);
1395 if (result != 0) {
1396 cl_io_fini(env, io);
1397 return io->ci_result;
1398 }
1399
1400 do {
1401 result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
1402 page_prune_cb, NULL);
1403 if (result == CLP_GANG_RESCHED)
1404 cond_resched();
1405 } while (result != CLP_GANG_OKAY);
1406
1407 cl_io_fini(env, io);
1408 return result;
1409 }
1410 EXPORT_SYMBOL(cl_pages_prune);
1411
1412 /**
1413 * Tells transfer engine that only part of a page is to be transmitted.
1414 *
1415 * \see cl_page_operations::cpo_clip()
1416 */
cl_page_clip(const struct lu_env * env,struct cl_page * pg,int from,int to)1417 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
1418 int from, int to)
1419 {
1420 PINVRNT(env, pg, cl_page_invariant(pg));
1421
1422 CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
1423 CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
1424 (const struct lu_env *,
1425 const struct cl_page_slice *, int, int),
1426 from, to);
1427 }
1428 EXPORT_SYMBOL(cl_page_clip);
1429
1430 /**
1431 * Prints human readable representation of \a pg to the \a f.
1432 */
cl_page_header_print(const struct lu_env * env,void * cookie,lu_printer_t printer,const struct cl_page * pg)1433 void cl_page_header_print(const struct lu_env *env, void *cookie,
1434 lu_printer_t printer, const struct cl_page *pg)
1435 {
1436 (*printer)(env, cookie,
1437 "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
1438 pg, atomic_read(&pg->cp_ref), pg->cp_obj,
1439 pg->cp_index, pg->cp_parent, pg->cp_child,
1440 pg->cp_state, pg->cp_error, pg->cp_type,
1441 pg->cp_owner, pg->cp_req, pg->cp_flags);
1442 }
1443 EXPORT_SYMBOL(cl_page_header_print);
1444
1445 /**
1446 * Prints human readable representation of \a pg to the \a f.
1447 */
cl_page_print(const struct lu_env * env,void * cookie,lu_printer_t printer,const struct cl_page * pg)1448 void cl_page_print(const struct lu_env *env, void *cookie,
1449 lu_printer_t printer, const struct cl_page *pg)
1450 {
1451 struct cl_page *scan;
1452
1453 for (scan = cl_page_top((struct cl_page *)pg);
1454 scan != NULL; scan = scan->cp_child)
1455 cl_page_header_print(env, cookie, printer, scan);
1456 CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1457 (const struct lu_env *env,
1458 const struct cl_page_slice *slice,
1459 void *cookie, lu_printer_t p), cookie, printer);
1460 (*printer)(env, cookie, "end page@%p\n", pg);
1461 }
1462 EXPORT_SYMBOL(cl_page_print);
1463
1464 /**
1465 * Cancel a page which is still in a transfer.
1466 */
cl_page_cancel(const struct lu_env * env,struct cl_page * page)1467 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1468 {
1469 return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1470 (const struct lu_env *,
1471 const struct cl_page_slice *));
1472 }
1473 EXPORT_SYMBOL(cl_page_cancel);
1474
1475 /**
1476 * Converts a byte offset within object \a obj into a page index.
1477 */
cl_offset(const struct cl_object * obj,pgoff_t idx)1478 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1479 {
1480 /*
1481 * XXX for now.
1482 */
1483 return (loff_t)idx << PAGE_CACHE_SHIFT;
1484 }
1485 EXPORT_SYMBOL(cl_offset);
1486
1487 /**
1488 * Converts a page index into a byte offset within object \a obj.
1489 */
cl_index(const struct cl_object * obj,loff_t offset)1490 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1491 {
1492 /*
1493 * XXX for now.
1494 */
1495 return offset >> PAGE_CACHE_SHIFT;
1496 }
1497 EXPORT_SYMBOL(cl_index);
1498
cl_page_size(const struct cl_object * obj)1499 int cl_page_size(const struct cl_object *obj)
1500 {
1501 return 1 << PAGE_CACHE_SHIFT;
1502 }
1503 EXPORT_SYMBOL(cl_page_size);
1504
1505 /**
1506 * Adds page slice to the compound page.
1507 *
1508 * This is called by cl_object_operations::coo_page_init() methods to add a
1509 * per-layer state to the page. New state is added at the end of
1510 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1511 *
1512 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1513 */
cl_page_slice_add(struct cl_page * page,struct cl_page_slice * slice,struct cl_object * obj,const struct cl_page_operations * ops)1514 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1515 struct cl_object *obj,
1516 const struct cl_page_operations *ops)
1517 {
1518 list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1519 slice->cpl_obj = obj;
1520 slice->cpl_ops = ops;
1521 slice->cpl_page = page;
1522 }
1523 EXPORT_SYMBOL(cl_page_slice_add);
1524
cl_page_init(void)1525 int cl_page_init(void)
1526 {
1527 return 0;
1528 }
1529
cl_page_fini(void)1530 void cl_page_fini(void)
1531 {
1532 }
1533