1 /*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9
10 #include "blocklayout.h"
11
12 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
13
14 static void
bl_free_device(struct pnfs_block_dev * dev)15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17 if (dev->nr_children) {
18 int i;
19
20 for (i = 0; i < dev->nr_children; i++)
21 bl_free_device(&dev->children[i]);
22 kfree(dev->children);
23 } else {
24 if (dev->bdev)
25 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
26 }
27 }
28
29 void
bl_free_deviceid_node(struct nfs4_deviceid_node * d)30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32 struct pnfs_block_dev *dev =
33 container_of(d, struct pnfs_block_dev, node);
34
35 bl_free_device(dev);
36 kfree_rcu(dev, node.rcu);
37 }
38
39 static int
nfs4_block_decode_volume(struct xdr_stream * xdr,struct pnfs_block_volume * b)40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42 __be32 *p;
43 int i;
44
45 p = xdr_inline_decode(xdr, 4);
46 if (!p)
47 return -EIO;
48 b->type = be32_to_cpup(p++);
49
50 switch (b->type) {
51 case PNFS_BLOCK_VOLUME_SIMPLE:
52 p = xdr_inline_decode(xdr, 4);
53 if (!p)
54 return -EIO;
55 b->simple.nr_sigs = be32_to_cpup(p++);
56 if (!b->simple.nr_sigs) {
57 dprintk("no signature\n");
58 return -EIO;
59 }
60
61 b->simple.len = 4 + 4;
62 for (i = 0; i < b->simple.nr_sigs; i++) {
63 p = xdr_inline_decode(xdr, 8 + 4);
64 if (!p)
65 return -EIO;
66 p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67 b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68 if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
69 pr_info("signature too long: %d\n",
70 b->simple.sigs[i].sig_len);
71 return -EIO;
72 }
73
74 p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
75 if (!p)
76 return -EIO;
77 memcpy(&b->simple.sigs[i].sig, p,
78 b->simple.sigs[i].sig_len);
79
80 b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
81 }
82 break;
83 case PNFS_BLOCK_VOLUME_SLICE:
84 p = xdr_inline_decode(xdr, 8 + 8 + 4);
85 if (!p)
86 return -EIO;
87 p = xdr_decode_hyper(p, &b->slice.start);
88 p = xdr_decode_hyper(p, &b->slice.len);
89 b->slice.volume = be32_to_cpup(p++);
90 break;
91 case PNFS_BLOCK_VOLUME_CONCAT:
92 p = xdr_inline_decode(xdr, 4);
93 if (!p)
94 return -EIO;
95 b->concat.volumes_count = be32_to_cpup(p++);
96
97 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
98 if (!p)
99 return -EIO;
100 for (i = 0; i < b->concat.volumes_count; i++)
101 b->concat.volumes[i] = be32_to_cpup(p++);
102 break;
103 case PNFS_BLOCK_VOLUME_STRIPE:
104 p = xdr_inline_decode(xdr, 8 + 4);
105 if (!p)
106 return -EIO;
107 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
108 b->stripe.volumes_count = be32_to_cpup(p++);
109
110 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
111 if (!p)
112 return -EIO;
113 for (i = 0; i < b->stripe.volumes_count; i++)
114 b->stripe.volumes[i] = be32_to_cpup(p++);
115 break;
116 default:
117 dprintk("unknown volume type!\n");
118 return -EIO;
119 }
120
121 return 0;
122 }
123
bl_map_simple(struct pnfs_block_dev * dev,u64 offset,struct pnfs_block_dev_map * map)124 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
125 struct pnfs_block_dev_map *map)
126 {
127 map->start = dev->start;
128 map->len = dev->len;
129 map->disk_offset = dev->disk_offset;
130 map->bdev = dev->bdev;
131 return true;
132 }
133
bl_map_concat(struct pnfs_block_dev * dev,u64 offset,struct pnfs_block_dev_map * map)134 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
135 struct pnfs_block_dev_map *map)
136 {
137 int i;
138
139 for (i = 0; i < dev->nr_children; i++) {
140 struct pnfs_block_dev *child = &dev->children[i];
141
142 if (child->start > offset ||
143 child->start + child->len <= offset)
144 continue;
145
146 child->map(child, offset - child->start, map);
147 return true;
148 }
149
150 dprintk("%s: ran off loop!\n", __func__);
151 return false;
152 }
153
bl_map_stripe(struct pnfs_block_dev * dev,u64 offset,struct pnfs_block_dev_map * map)154 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
155 struct pnfs_block_dev_map *map)
156 {
157 struct pnfs_block_dev *child;
158 u64 chunk;
159 u32 chunk_idx;
160 u64 disk_offset;
161
162 chunk = div_u64(offset, dev->chunk_size);
163 div_u64_rem(chunk, dev->nr_children, &chunk_idx);
164
165 if (chunk_idx >= dev->nr_children) {
166 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
167 __func__, chunk_idx, offset, dev->chunk_size);
168 /* error, should not happen */
169 return false;
170 }
171
172 /* truncate offset to the beginning of the stripe */
173 offset = chunk * dev->chunk_size;
174
175 /* disk offset of the stripe */
176 disk_offset = div_u64(offset, dev->nr_children);
177
178 child = &dev->children[chunk_idx];
179 child->map(child, disk_offset, map);
180
181 map->start += offset;
182 map->disk_offset += disk_offset;
183 map->len = dev->chunk_size;
184 return true;
185 }
186
187 static int
188 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
189 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
190
191
192 static int
bl_parse_simple(struct nfs_server * server,struct pnfs_block_dev * d,struct pnfs_block_volume * volumes,int idx,gfp_t gfp_mask)193 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
194 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
195 {
196 struct pnfs_block_volume *v = &volumes[idx];
197 dev_t dev;
198
199 dev = bl_resolve_deviceid(server, v, gfp_mask);
200 if (!dev)
201 return -EIO;
202
203 d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
204 if (IS_ERR(d->bdev)) {
205 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
206 MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
207 return PTR_ERR(d->bdev);
208 }
209
210
211 d->len = i_size_read(d->bdev->bd_inode);
212 d->map = bl_map_simple;
213
214 printk(KERN_INFO "pNFS: using block device %s\n",
215 d->bdev->bd_disk->disk_name);
216 return 0;
217 }
218
219 static int
bl_parse_slice(struct nfs_server * server,struct pnfs_block_dev * d,struct pnfs_block_volume * volumes,int idx,gfp_t gfp_mask)220 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
221 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
222 {
223 struct pnfs_block_volume *v = &volumes[idx];
224 int ret;
225
226 ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
227 if (ret)
228 return ret;
229
230 d->disk_offset = v->slice.start;
231 d->len = v->slice.len;
232 return 0;
233 }
234
235 static int
bl_parse_concat(struct nfs_server * server,struct pnfs_block_dev * d,struct pnfs_block_volume * volumes,int idx,gfp_t gfp_mask)236 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
237 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
238 {
239 struct pnfs_block_volume *v = &volumes[idx];
240 u64 len = 0;
241 int ret, i;
242
243 d->children = kcalloc(v->concat.volumes_count,
244 sizeof(struct pnfs_block_dev), GFP_KERNEL);
245 if (!d->children)
246 return -ENOMEM;
247
248 for (i = 0; i < v->concat.volumes_count; i++) {
249 ret = bl_parse_deviceid(server, &d->children[i],
250 volumes, v->concat.volumes[i], gfp_mask);
251 if (ret)
252 return ret;
253
254 d->nr_children++;
255 d->children[i].start += len;
256 len += d->children[i].len;
257 }
258
259 d->len = len;
260 d->map = bl_map_concat;
261 return 0;
262 }
263
264 static int
bl_parse_stripe(struct nfs_server * server,struct pnfs_block_dev * d,struct pnfs_block_volume * volumes,int idx,gfp_t gfp_mask)265 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
266 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
267 {
268 struct pnfs_block_volume *v = &volumes[idx];
269 u64 len = 0;
270 int ret, i;
271
272 d->children = kcalloc(v->stripe.volumes_count,
273 sizeof(struct pnfs_block_dev), GFP_KERNEL);
274 if (!d->children)
275 return -ENOMEM;
276
277 for (i = 0; i < v->stripe.volumes_count; i++) {
278 ret = bl_parse_deviceid(server, &d->children[i],
279 volumes, v->stripe.volumes[i], gfp_mask);
280 if (ret)
281 return ret;
282
283 d->nr_children++;
284 len += d->children[i].len;
285 }
286
287 d->len = len;
288 d->chunk_size = v->stripe.chunk_size;
289 d->map = bl_map_stripe;
290 return 0;
291 }
292
293 static int
bl_parse_deviceid(struct nfs_server * server,struct pnfs_block_dev * d,struct pnfs_block_volume * volumes,int idx,gfp_t gfp_mask)294 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
295 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
296 {
297 switch (volumes[idx].type) {
298 case PNFS_BLOCK_VOLUME_SIMPLE:
299 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
300 case PNFS_BLOCK_VOLUME_SLICE:
301 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
302 case PNFS_BLOCK_VOLUME_CONCAT:
303 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
304 case PNFS_BLOCK_VOLUME_STRIPE:
305 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
306 default:
307 dprintk("unsupported volume type: %d\n", volumes[idx].type);
308 return -EIO;
309 }
310 }
311
312 struct nfs4_deviceid_node *
bl_alloc_deviceid_node(struct nfs_server * server,struct pnfs_device * pdev,gfp_t gfp_mask)313 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
314 gfp_t gfp_mask)
315 {
316 struct nfs4_deviceid_node *node = NULL;
317 struct pnfs_block_volume *volumes;
318 struct pnfs_block_dev *top;
319 struct xdr_stream xdr;
320 struct xdr_buf buf;
321 struct page *scratch;
322 int nr_volumes, ret, i;
323 __be32 *p;
324
325 scratch = alloc_page(gfp_mask);
326 if (!scratch)
327 goto out;
328
329 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
330 xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
331
332 p = xdr_inline_decode(&xdr, sizeof(__be32));
333 if (!p)
334 goto out_free_scratch;
335 nr_volumes = be32_to_cpup(p++);
336
337 volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
338 gfp_mask);
339 if (!volumes)
340 goto out_free_scratch;
341
342 for (i = 0; i < nr_volumes; i++) {
343 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
344 if (ret < 0)
345 goto out_free_volumes;
346 }
347
348 top = kzalloc(sizeof(*top), gfp_mask);
349 if (!top)
350 goto out_free_volumes;
351
352 ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
353 if (ret) {
354 bl_free_device(top);
355 kfree(top);
356 goto out_free_volumes;
357 }
358
359 node = &top->node;
360 nfs4_init_deviceid_node(node, server, &pdev->dev_id);
361
362 out_free_volumes:
363 kfree(volumes);
364 out_free_scratch:
365 __free_page(scratch);
366 out:
367 return node;
368 }
369