1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2012, 2013, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/fld/fld_cache.c
37 *
38 * FLD (Fids Location Database)
39 *
40 * Author: Pravin Shelar <pravin.shelar@sun.com>
41 * Author: Yury Umanets <umka@clusterfs.com>
42 */
43
44 #define DEBUG_SUBSYSTEM S_FLD
45
46 #include "../../include/linux/libcfs/libcfs.h"
47 #include <linux/module.h>
48 #include <asm/div64.h>
49
50 #include "../include/obd.h"
51 #include "../include/obd_class.h"
52 #include "../include/lustre_ver.h"
53 #include "../include/obd_support.h"
54 #include "../include/lprocfs_status.h"
55
56 #include "../include/lustre_req_layout.h"
57 #include "../include/lustre_fld.h"
58 #include "fld_internal.h"
59
60 /**
61 * create fld cache.
62 */
fld_cache_init(const char * name,int cache_size,int cache_threshold)63 struct fld_cache *fld_cache_init(const char *name,
64 int cache_size, int cache_threshold)
65 {
66 struct fld_cache *cache;
67
68 LASSERT(name != NULL);
69 LASSERT(cache_threshold < cache_size);
70
71 cache = kzalloc(sizeof(*cache), GFP_NOFS);
72 if (!cache)
73 return ERR_PTR(-ENOMEM);
74
75 INIT_LIST_HEAD(&cache->fci_entries_head);
76 INIT_LIST_HEAD(&cache->fci_lru);
77
78 cache->fci_cache_count = 0;
79 rwlock_init(&cache->fci_lock);
80
81 strlcpy(cache->fci_name, name,
82 sizeof(cache->fci_name));
83
84 cache->fci_cache_size = cache_size;
85 cache->fci_threshold = cache_threshold;
86
87 /* Init fld cache info. */
88 memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
89
90 CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
91 cache->fci_name, cache_size, cache_threshold);
92
93 return cache;
94 }
95
96 /**
97 * destroy fld cache.
98 */
fld_cache_fini(struct fld_cache * cache)99 void fld_cache_fini(struct fld_cache *cache)
100 {
101 __u64 pct;
102
103 LASSERT(cache != NULL);
104 fld_cache_flush(cache);
105
106 if (cache->fci_stat.fst_count > 0) {
107 pct = cache->fci_stat.fst_cache * 100;
108 do_div(pct, cache->fci_stat.fst_count);
109 } else {
110 pct = 0;
111 }
112
113 CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
114 CDEBUG(D_INFO, " Total reqs: %llu\n", cache->fci_stat.fst_count);
115 CDEBUG(D_INFO, " Cache reqs: %llu\n", cache->fci_stat.fst_cache);
116 CDEBUG(D_INFO, " Cache hits: %llu%%\n", pct);
117
118 kfree(cache);
119 }
120
121 /**
122 * delete given node from list.
123 */
fld_cache_entry_delete(struct fld_cache * cache,struct fld_cache_entry * node)124 void fld_cache_entry_delete(struct fld_cache *cache,
125 struct fld_cache_entry *node)
126 {
127 list_del(&node->fce_list);
128 list_del(&node->fce_lru);
129 cache->fci_cache_count--;
130 kfree(node);
131 }
132
133 /**
134 * fix list by checking new entry with NEXT entry in order.
135 */
fld_fix_new_list(struct fld_cache * cache)136 static void fld_fix_new_list(struct fld_cache *cache)
137 {
138 struct fld_cache_entry *f_curr;
139 struct fld_cache_entry *f_next;
140 struct lu_seq_range *c_range;
141 struct lu_seq_range *n_range;
142 struct list_head *head = &cache->fci_entries_head;
143
144 restart_fixup:
145
146 list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
147 c_range = &f_curr->fce_range;
148 n_range = &f_next->fce_range;
149
150 LASSERT(range_is_sane(c_range));
151 if (&f_next->fce_list == head)
152 break;
153
154 if (c_range->lsr_flags != n_range->lsr_flags)
155 continue;
156
157 LASSERTF(c_range->lsr_start <= n_range->lsr_start,
158 "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
159 PRANGE(c_range), PRANGE(n_range));
160
161 /* check merge possibility with next range */
162 if (c_range->lsr_end == n_range->lsr_start) {
163 if (c_range->lsr_index != n_range->lsr_index)
164 continue;
165 n_range->lsr_start = c_range->lsr_start;
166 fld_cache_entry_delete(cache, f_curr);
167 continue;
168 }
169
170 /* check if current range overlaps with next range. */
171 if (n_range->lsr_start < c_range->lsr_end) {
172 if (c_range->lsr_index == n_range->lsr_index) {
173 n_range->lsr_start = c_range->lsr_start;
174 n_range->lsr_end = max(c_range->lsr_end,
175 n_range->lsr_end);
176 fld_cache_entry_delete(cache, f_curr);
177 } else {
178 if (n_range->lsr_end <= c_range->lsr_end) {
179 *n_range = *c_range;
180 fld_cache_entry_delete(cache, f_curr);
181 } else
182 n_range->lsr_start = c_range->lsr_end;
183 }
184
185 /* we could have overlap over next
186 * range too. better restart. */
187 goto restart_fixup;
188 }
189
190 /* kill duplicates */
191 if (c_range->lsr_start == n_range->lsr_start &&
192 c_range->lsr_end == n_range->lsr_end)
193 fld_cache_entry_delete(cache, f_curr);
194 }
195 }
196
197 /**
198 * add node to fld cache
199 */
fld_cache_entry_add(struct fld_cache * cache,struct fld_cache_entry * f_new,struct list_head * pos)200 static inline void fld_cache_entry_add(struct fld_cache *cache,
201 struct fld_cache_entry *f_new,
202 struct list_head *pos)
203 {
204 list_add(&f_new->fce_list, pos);
205 list_add(&f_new->fce_lru, &cache->fci_lru);
206
207 cache->fci_cache_count++;
208 fld_fix_new_list(cache);
209 }
210
211 /**
212 * Check if cache needs to be shrunk. If so - do it.
213 * Remove one entry in list and so on until cache is shrunk enough.
214 */
fld_cache_shrink(struct fld_cache * cache)215 static int fld_cache_shrink(struct fld_cache *cache)
216 {
217 struct fld_cache_entry *flde;
218 struct list_head *curr;
219 int num = 0;
220
221 LASSERT(cache != NULL);
222
223 if (cache->fci_cache_count < cache->fci_cache_size)
224 return 0;
225
226 curr = cache->fci_lru.prev;
227
228 while (cache->fci_cache_count + cache->fci_threshold >
229 cache->fci_cache_size && curr != &cache->fci_lru) {
230
231 flde = list_entry(curr, struct fld_cache_entry, fce_lru);
232 curr = curr->prev;
233 fld_cache_entry_delete(cache, flde);
234 num++;
235 }
236
237 CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n",
238 cache->fci_name, num);
239
240 return 0;
241 }
242
243 /**
244 * kill all fld cache entries.
245 */
fld_cache_flush(struct fld_cache * cache)246 void fld_cache_flush(struct fld_cache *cache)
247 {
248 write_lock(&cache->fci_lock);
249 cache->fci_cache_size = 0;
250 fld_cache_shrink(cache);
251 write_unlock(&cache->fci_lock);
252 }
253
254 /**
255 * punch hole in existing range. divide this range and add new
256 * entry accordingly.
257 */
258
fld_cache_punch_hole(struct fld_cache * cache,struct fld_cache_entry * f_curr,struct fld_cache_entry * f_new)259 static void fld_cache_punch_hole(struct fld_cache *cache,
260 struct fld_cache_entry *f_curr,
261 struct fld_cache_entry *f_new)
262 {
263 const struct lu_seq_range *range = &f_new->fce_range;
264 const u64 new_start = range->lsr_start;
265 const u64 new_end = range->lsr_end;
266 struct fld_cache_entry *fldt;
267
268 fldt = kzalloc(sizeof(*fldt), GFP_ATOMIC);
269 if (!fldt) {
270 kfree(f_new);
271 /* overlap is not allowed, so dont mess up list. */
272 return;
273 }
274 /* break f_curr RANGE into three RANGES:
275 * f_curr, f_new , fldt
276 */
277
278 /* f_new = *range */
279
280 /* fldt */
281 fldt->fce_range.lsr_start = new_end;
282 fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
283 fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
284
285 /* f_curr */
286 f_curr->fce_range.lsr_end = new_start;
287
288 /* add these two entries to list */
289 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
290 fld_cache_entry_add(cache, fldt, &f_new->fce_list);
291
292 /* no need to fixup */
293 }
294
295 /**
296 * handle range overlap in fld cache.
297 */
fld_cache_overlap_handle(struct fld_cache * cache,struct fld_cache_entry * f_curr,struct fld_cache_entry * f_new)298 static void fld_cache_overlap_handle(struct fld_cache *cache,
299 struct fld_cache_entry *f_curr,
300 struct fld_cache_entry *f_new)
301 {
302 const struct lu_seq_range *range = &f_new->fce_range;
303 const u64 new_start = range->lsr_start;
304 const u64 new_end = range->lsr_end;
305 const u32 mdt = range->lsr_index;
306
307 /* this is overlap case, these case are checking overlapping with
308 * prev range only. fixup will handle overlapping with next range. */
309
310 if (f_curr->fce_range.lsr_index == mdt) {
311 f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
312 new_start);
313
314 f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
315 new_end);
316
317 kfree(f_new);
318 fld_fix_new_list(cache);
319
320 } else if (new_start <= f_curr->fce_range.lsr_start &&
321 f_curr->fce_range.lsr_end <= new_end) {
322 /* case 1: new range completely overshadowed existing range.
323 * e.g. whole range migrated. update fld cache entry */
324
325 f_curr->fce_range = *range;
326 kfree(f_new);
327 fld_fix_new_list(cache);
328
329 } else if (f_curr->fce_range.lsr_start < new_start &&
330 new_end < f_curr->fce_range.lsr_end) {
331 /* case 2: new range fit within existing range. */
332
333 fld_cache_punch_hole(cache, f_curr, f_new);
334
335 } else if (new_end <= f_curr->fce_range.lsr_end) {
336 /* case 3: overlap:
337 * [new_start [c_start new_end) c_end)
338 */
339
340 LASSERT(new_start <= f_curr->fce_range.lsr_start);
341
342 f_curr->fce_range.lsr_start = new_end;
343 fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
344
345 } else if (f_curr->fce_range.lsr_start <= new_start) {
346 /* case 4: overlap:
347 * [c_start [new_start c_end) new_end)
348 */
349
350 LASSERT(f_curr->fce_range.lsr_end <= new_end);
351
352 f_curr->fce_range.lsr_end = new_start;
353 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
354 } else
355 CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
356 PRANGE(range), PRANGE(&f_curr->fce_range));
357 }
358
359 struct fld_cache_entry
fld_cache_entry_create(const struct lu_seq_range * range)360 *fld_cache_entry_create(const struct lu_seq_range *range)
361 {
362 struct fld_cache_entry *f_new;
363
364 LASSERT(range_is_sane(range));
365
366 f_new = kzalloc(sizeof(*f_new), GFP_NOFS);
367 if (!f_new)
368 return ERR_PTR(-ENOMEM);
369
370 f_new->fce_range = *range;
371 return f_new;
372 }
373
374 /**
375 * Insert FLD entry in FLD cache.
376 *
377 * This function handles all cases of merging and breaking up of
378 * ranges.
379 */
fld_cache_insert_nolock(struct fld_cache * cache,struct fld_cache_entry * f_new)380 int fld_cache_insert_nolock(struct fld_cache *cache,
381 struct fld_cache_entry *f_new)
382 {
383 struct fld_cache_entry *f_curr;
384 struct fld_cache_entry *n;
385 struct list_head *head;
386 struct list_head *prev = NULL;
387 const u64 new_start = f_new->fce_range.lsr_start;
388 const u64 new_end = f_new->fce_range.lsr_end;
389 __u32 new_flags = f_new->fce_range.lsr_flags;
390
391 /*
392 * Duplicate entries are eliminated in insert op.
393 * So we don't need to search new entry before starting
394 * insertion loop.
395 */
396
397 if (!cache->fci_no_shrink)
398 fld_cache_shrink(cache);
399
400 head = &cache->fci_entries_head;
401
402 list_for_each_entry_safe(f_curr, n, head, fce_list) {
403 /* add list if next is end of list */
404 if (new_end < f_curr->fce_range.lsr_start ||
405 (new_end == f_curr->fce_range.lsr_start &&
406 new_flags != f_curr->fce_range.lsr_flags))
407 break;
408
409 prev = &f_curr->fce_list;
410 /* check if this range is to left of new range. */
411 if (new_start < f_curr->fce_range.lsr_end &&
412 new_flags == f_curr->fce_range.lsr_flags) {
413 fld_cache_overlap_handle(cache, f_curr, f_new);
414 goto out;
415 }
416 }
417
418 if (prev == NULL)
419 prev = head;
420
421 CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
422 /* Add new entry to cache and lru list. */
423 fld_cache_entry_add(cache, f_new, prev);
424 out:
425 return 0;
426 }
427
fld_cache_insert(struct fld_cache * cache,const struct lu_seq_range * range)428 int fld_cache_insert(struct fld_cache *cache,
429 const struct lu_seq_range *range)
430 {
431 struct fld_cache_entry *flde;
432 int rc;
433
434 flde = fld_cache_entry_create(range);
435 if (IS_ERR(flde))
436 return PTR_ERR(flde);
437
438 write_lock(&cache->fci_lock);
439 rc = fld_cache_insert_nolock(cache, flde);
440 write_unlock(&cache->fci_lock);
441 if (rc)
442 kfree(flde);
443
444 return rc;
445 }
446
fld_cache_delete_nolock(struct fld_cache * cache,const struct lu_seq_range * range)447 void fld_cache_delete_nolock(struct fld_cache *cache,
448 const struct lu_seq_range *range)
449 {
450 struct fld_cache_entry *flde;
451 struct fld_cache_entry *tmp;
452 struct list_head *head;
453
454 head = &cache->fci_entries_head;
455 list_for_each_entry_safe(flde, tmp, head, fce_list) {
456 /* add list if next is end of list */
457 if (range->lsr_start == flde->fce_range.lsr_start ||
458 (range->lsr_end == flde->fce_range.lsr_end &&
459 range->lsr_flags == flde->fce_range.lsr_flags)) {
460 fld_cache_entry_delete(cache, flde);
461 break;
462 }
463 }
464 }
465
466 /**
467 * Delete FLD entry in FLD cache.
468 *
469 */
fld_cache_delete(struct fld_cache * cache,const struct lu_seq_range * range)470 void fld_cache_delete(struct fld_cache *cache,
471 const struct lu_seq_range *range)
472 {
473 write_lock(&cache->fci_lock);
474 fld_cache_delete_nolock(cache, range);
475 write_unlock(&cache->fci_lock);
476 }
477
478 struct fld_cache_entry
fld_cache_entry_lookup_nolock(struct fld_cache * cache,struct lu_seq_range * range)479 *fld_cache_entry_lookup_nolock(struct fld_cache *cache,
480 struct lu_seq_range *range)
481 {
482 struct fld_cache_entry *flde;
483 struct fld_cache_entry *got = NULL;
484 struct list_head *head;
485
486 head = &cache->fci_entries_head;
487 list_for_each_entry(flde, head, fce_list) {
488 if (range->lsr_start == flde->fce_range.lsr_start ||
489 (range->lsr_end == flde->fce_range.lsr_end &&
490 range->lsr_flags == flde->fce_range.lsr_flags)) {
491 got = flde;
492 break;
493 }
494 }
495
496 return got;
497 }
498
499 /**
500 * lookup \a seq sequence for range in fld cache.
501 */
502 struct fld_cache_entry
fld_cache_entry_lookup(struct fld_cache * cache,struct lu_seq_range * range)503 *fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
504 {
505 struct fld_cache_entry *got = NULL;
506
507 read_lock(&cache->fci_lock);
508 got = fld_cache_entry_lookup_nolock(cache, range);
509 read_unlock(&cache->fci_lock);
510 return got;
511 }
512
513 /**
514 * lookup \a seq sequence for range in fld cache.
515 */
fld_cache_lookup(struct fld_cache * cache,const u64 seq,struct lu_seq_range * range)516 int fld_cache_lookup(struct fld_cache *cache,
517 const u64 seq, struct lu_seq_range *range)
518 {
519 struct fld_cache_entry *flde;
520 struct fld_cache_entry *prev = NULL;
521 struct list_head *head;
522
523 read_lock(&cache->fci_lock);
524 head = &cache->fci_entries_head;
525
526 cache->fci_stat.fst_count++;
527 list_for_each_entry(flde, head, fce_list) {
528 if (flde->fce_range.lsr_start > seq) {
529 if (prev != NULL)
530 *range = prev->fce_range;
531 break;
532 }
533
534 prev = flde;
535 if (range_within(&flde->fce_range, seq)) {
536 *range = flde->fce_range;
537
538 cache->fci_stat.fst_cache++;
539 read_unlock(&cache->fci_lock);
540 return 0;
541 }
542 }
543 read_unlock(&cache->fci_lock);
544 return -ENOENT;
545 }
546