Lines Matching +full:dout +full:- +full:default +full:- +full:2
3 rbd.c -- Export ceph rados objects as a Linux block device
27 Documentation/ABI/testing/sysfs-bus-rbd
43 #include <linux/blk-mq.h>
58 * -EINVAL without updating it.
70 return -EINVAL; in atomic_inc_return_safe()
73 /* Decrement the counter. Return the resulting value, or -EINVAL */
84 return -EINVAL; in atomic_dec_return_safe()
96 (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
100 #define RBD_SNAP_HEAD_NAME "-"
105 #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
117 #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
144 * block device image metadata (in-memory version)
170 * user-mapped image, the names are supplied and the id's associated
175 * non-null if the image it represents is a child in a layered
190 const char *pool_ns; /* NULL if default, never "" */
233 #define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2)
252 * . v v (deep-copyup .
352 list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
354 list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
388 u32 image_format; /* Either 1 or 2 */
455 * Flag bits for rbd_dev->flags:
456 * - REMOVING (which is coupled with rbd_dev->open_count) is protected
457 * by rbd_dev->lock
462 RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
473 /* Slab caches for frequently-allocated structures */
488 * single-major requires >= 0.75 version of userspace rbd utility.
492 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
515 return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); in rbd_is_ro()
520 return rbd_dev->spec->snap_id != CEPH_NOSNAP; in rbd_is_snap()
525 lockdep_assert_held(&rbd_dev->lock_rwsem); in __rbd_is_lock_owner()
527 return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || in __rbd_is_lock_owner()
528 rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING; in __rbd_is_lock_owner()
535 down_read(&rbd_dev->lock_rwsem); in rbd_is_lock_owner()
537 up_read(&rbd_dev->lock_rwsem); in rbd_is_lock_owner()
569 return attr->mode; in rbd_bus_is_visible()
592 static __printf(2, 3)
604 else if (rbd_dev->disk) in rbd_warn()
606 RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); in rbd_warn()
607 else if (rbd_dev->spec && rbd_dev->spec->image_name) in rbd_warn()
609 RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); in rbd_warn()
610 else if (rbd_dev->spec && rbd_dev->spec->image_id) in rbd_warn()
612 RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); in rbd_warn()
651 rbd_assert(pending->num_pending > 0); in pending_result_dec()
653 if (*result && !pending->result) in pending_result_dec()
654 pending->result = *result; in pending_result_dec()
655 if (--pending->num_pending) in pending_result_dec()
658 *result = pending->result; in pending_result_dec()
664 struct rbd_device *rbd_dev = disk->private_data; in rbd_open()
667 spin_lock_irq(&rbd_dev->lock); in rbd_open()
668 if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) in rbd_open()
671 rbd_dev->open_count++; in rbd_open()
672 spin_unlock_irq(&rbd_dev->lock); in rbd_open()
674 return -ENOENT; in rbd_open()
676 (void) get_device(&rbd_dev->dev); in rbd_open()
683 struct rbd_device *rbd_dev = disk->private_data; in rbd_release()
686 spin_lock_irq(&rbd_dev->lock); in rbd_release()
687 open_count_before = rbd_dev->open_count--; in rbd_release()
688 spin_unlock_irq(&rbd_dev->lock); in rbd_release()
691 put_device(&rbd_dev->dev); in rbd_release()
707 int ret = -ENOMEM; in rbd_client_create()
709 dout("%s:\n", __func__); in rbd_client_create()
714 kref_init(&rbdc->kref); in rbd_client_create()
715 INIT_LIST_HEAD(&rbdc->node); in rbd_client_create()
717 rbdc->client = ceph_create_client(ceph_opts, rbdc); in rbd_client_create()
718 if (IS_ERR(rbdc->client)) in rbd_client_create()
720 ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ in rbd_client_create()
722 ret = ceph_open_session(rbdc->client); in rbd_client_create()
727 list_add_tail(&rbdc->node, &rbd_client_list); in rbd_client_create()
730 dout("%s: rbdc %p\n", __func__, rbdc); in rbd_client_create()
734 ceph_destroy_client(rbdc->client); in rbd_client_create()
740 dout("%s: error %d\n", __func__, ret); in rbd_client_create()
747 kref_get(&rbdc->kref); in __rbd_get_client()
760 if (ceph_opts->flags & CEPH_OPT_NOSHARE) in rbd_client_find()
765 if (!ceph_compare_options(ceph_opts, iter->client)) { in rbd_client_find()
862 default: in obj_op_name()
876 dout("%s: rbdc %p\n", __func__, rbdc); in rbd_client_release()
878 list_del(&rbdc->node); in rbd_client_release()
881 ceph_destroy_client(rbdc->client); in rbd_client_release()
892 kref_put(&rbdc->kref, rbd_client_release); in rbd_put_client()
911 * Using an existing client. Make sure ->pg_pools is up to in rbd_get_client()
914 ret = ceph_wait_for_latest_osdmap(rbdc->client, in rbd_get_client()
915 rbdc->client->options->mount_timeout); in rbd_get_client()
931 return image_format == 1 || image_format == 2; in rbd_image_format_valid()
940 if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) in rbd_dev_ondisk_valid()
943 /* The bio layer requires at least sector-sized I/O */ in rbd_dev_ondisk_valid()
945 if (ondisk->options.order < SECTOR_SHIFT) in rbd_dev_ondisk_valid()
950 if (ondisk->options.order > 8 * sizeof (int) - 1) in rbd_dev_ondisk_valid()
957 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_dev_ondisk_valid()
958 size = SIZE_MAX - sizeof (struct ceph_snap_context); in rbd_dev_ondisk_valid()
966 size -= snap_count * sizeof (__le64); in rbd_dev_ondisk_valid()
967 if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) in rbd_dev_ondisk_valid()
978 return 1U << header->obj_order; in rbd_obj_bytes()
983 if (rbd_dev->header.stripe_unit == 0 || in rbd_init_layout()
984 rbd_dev->header.stripe_count == 0) { in rbd_init_layout()
985 rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header); in rbd_init_layout()
986 rbd_dev->header.stripe_count = 1; in rbd_init_layout()
989 rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit; in rbd_init_layout()
990 rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count; in rbd_init_layout()
991 rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header); in rbd_init_layout()
992 rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ? in rbd_init_layout()
993 rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id; in rbd_init_layout()
994 RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); in rbd_init_layout()
999 kfree(header->object_prefix); in rbd_image_header_cleanup()
1000 ceph_put_snap_context(header->snapc); in rbd_image_header_cleanup()
1001 kfree(header->snap_sizes); in rbd_image_header_cleanup()
1002 kfree(header->snap_names); in rbd_image_header_cleanup()
1009 * on-disk header.
1020 int ret = -ENOMEM; in rbd_header_from_disk()
1026 object_prefix = kstrndup(ondisk->object_prefix, in rbd_header_from_disk()
1027 sizeof(ondisk->object_prefix), in rbd_header_from_disk()
1030 return -ENOMEM; in rbd_header_from_disk()
1035 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_header_from_disk()
1039 snapc->seq = le64_to_cpu(ondisk->snap_seq); in rbd_header_from_disk()
1042 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); in rbd_header_from_disk()
1054 sizeof(*header->snap_sizes), in rbd_header_from_disk()
1068 memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len); in rbd_header_from_disk()
1069 snaps = ondisk->snaps; in rbd_header_from_disk()
1071 snapc->snaps[i] = le64_to_cpu(snaps[i].id); in rbd_header_from_disk()
1079 header->object_prefix = object_prefix; in rbd_header_from_disk()
1080 header->obj_order = ondisk->options.order; in rbd_header_from_disk()
1085 header->image_size = le64_to_cpu(ondisk->image_size); in rbd_header_from_disk()
1086 header->snapc = snapc; in rbd_header_from_disk()
1087 header->snap_names = snap_names; in rbd_header_from_disk()
1088 header->snap_sizes = snap_sizes; in rbd_header_from_disk()
1092 ret = -EIO; in rbd_header_from_disk()
1106 rbd_assert(which < rbd_dev->header.snapc->num_snaps); in _rbd_dev_v1_snap_name()
1110 snap_name = rbd_dev->header.snap_names; in _rbd_dev_v1_snap_name()
1111 while (which--) in _rbd_dev_v1_snap_name()
1128 return snap_id1 == snap_id2 ? 0 : -1; in snapid_compare_reverse()
1143 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_dev_snap_index()
1146 found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps, in rbd_dev_snap_index()
1149 return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX; in rbd_dev_snap_index()
1160 return ERR_PTR(-ENOENT); in rbd_dev_v1_snap_name()
1163 return snap_name ? snap_name : ERR_PTR(-ENOMEM); in rbd_dev_v1_snap_name()
1171 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_snap_name()
1172 if (rbd_dev->image_format == 1) in rbd_snap_name()
1181 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_snap_size()
1183 *snap_size = rbd_dev->header.image_size; in rbd_snap_size()
1184 } else if (rbd_dev->image_format == 1) { in rbd_snap_size()
1189 return -ENOENT; in rbd_snap_size()
1191 *snap_size = rbd_dev->header.snap_sizes[which]; in rbd_snap_size()
1207 u64 snap_id = rbd_dev->spec->snap_id; in rbd_dev_mapping_set()
1215 rbd_dev->mapping.size = size; in rbd_dev_mapping_set()
1221 rbd_dev->mapping.size = 0; in rbd_dev_mapping_clear()
1253 dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes); in rbd_obj_zero_range()
1255 switch (obj_req->img_request->data_type) { in rbd_obj_zero_range()
1257 zero_bios(&obj_req->bio_pos, off, bytes); in rbd_obj_zero_range()
1261 zero_bvecs(&obj_req->bvec_pos, off, bytes); in rbd_obj_zero_range()
1263 default: in rbd_obj_zero_range()
1272 dout("%s: obj %p (was %d)\n", __func__, obj_request, in rbd_obj_request_put()
1273 kref_read(&obj_request->kref)); in rbd_obj_request_put()
1274 kref_put(&obj_request->kref, rbd_obj_request_destroy); in rbd_obj_request_put()
1280 rbd_assert(obj_request->img_request == NULL); in rbd_img_obj_request_add()
1283 obj_request->img_request = img_request; in rbd_img_obj_request_add()
1284 dout("%s: img %p obj %p\n", __func__, img_request, obj_request); in rbd_img_obj_request_add()
1290 dout("%s: img %p obj %p\n", __func__, img_request, obj_request); in rbd_img_obj_request_del()
1291 list_del(&obj_request->ex.oe_item); in rbd_img_obj_request_del()
1292 rbd_assert(obj_request->img_request == img_request); in rbd_img_obj_request_del()
1298 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_submit()
1300 dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n", in rbd_osd_submit()
1301 __func__, osd_req, obj_req, obj_req->ex.oe_objno, in rbd_osd_submit()
1302 obj_req->ex.oe_off, obj_req->ex.oe_len); in rbd_osd_submit()
1303 ceph_osdc_start_request(osd_req->r_osdc, osd_req); in rbd_osd_submit()
1307 * The default/initial value for all image request flags is 0. Each
1313 set_bit(IMG_REQ_LAYERED, &img_request->flags); in img_request_layered_set()
1318 return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; in img_request_layered_test()
1323 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_is_entire()
1325 return !obj_req->ex.oe_off && in rbd_obj_is_entire()
1326 obj_req->ex.oe_len == rbd_dev->layout.object_size; in rbd_obj_is_entire()
1331 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_is_tail()
1333 return obj_req->ex.oe_off + obj_req->ex.oe_len == in rbd_obj_is_tail()
1334 rbd_dev->layout.object_size; in rbd_obj_is_tail()
1342 rbd_assert(obj_req->img_request->snapc); in rbd_obj_set_copyup_enabled()
1344 if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { in rbd_obj_set_copyup_enabled()
1345 dout("%s %p objno %llu discard\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1346 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1350 if (!obj_req->num_img_extents) { in rbd_obj_set_copyup_enabled()
1351 dout("%s %p objno %llu not overlapping\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1352 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1357 !obj_req->img_request->snapc->num_snaps) { in rbd_obj_set_copyup_enabled()
1358 dout("%s %p objno %llu entire\n", __func__, obj_req, in rbd_obj_set_copyup_enabled()
1359 obj_req->ex.oe_objno); in rbd_obj_set_copyup_enabled()
1363 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; in rbd_obj_set_copyup_enabled()
1368 return ceph_file_extents_bytes(obj_req->img_extents, in rbd_obj_img_extents_bytes()
1369 obj_req->num_img_extents); in rbd_obj_img_extents_bytes()
1374 switch (img_req->op_type) { in rbd_img_is_write()
1381 default: in rbd_img_is_write()
1388 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_req_callback()
1391 dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, in rbd_osd_req_callback()
1392 osd_req->r_result, obj_req); in rbd_osd_req_callback()
1399 if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request)) in rbd_osd_req_callback()
1402 result = osd_req->r_result; in rbd_osd_req_callback()
1409 struct rbd_obj_request *obj_request = osd_req->r_priv; in rbd_osd_format_read()
1410 struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; in rbd_osd_format_read()
1411 struct ceph_options *opt = rbd_dev->rbd_client->client->options; in rbd_osd_format_read()
1413 osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica; in rbd_osd_format_read()
1414 osd_req->r_snapid = obj_request->img_request->snap_id; in rbd_osd_format_read()
1419 struct rbd_obj_request *obj_request = osd_req->r_priv; in rbd_osd_format_write()
1421 osd_req->r_flags = CEPH_OSD_FLAG_WRITE; in rbd_osd_format_write()
1422 ktime_get_real_ts64(&osd_req->r_mtime); in rbd_osd_format_write()
1423 osd_req->r_data_offset = obj_request->ex.oe_off; in rbd_osd_format_write()
1430 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in __rbd_obj_add_osd_request()
1431 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_obj_add_osd_request()
1433 const char *name_format = rbd_dev->image_format == 1 ? in __rbd_obj_add_osd_request()
1439 return ERR_PTR(-ENOMEM); in __rbd_obj_add_osd_request()
1441 list_add_tail(&req->r_private_item, &obj_req->osd_reqs); in __rbd_obj_add_osd_request()
1442 req->r_callback = rbd_osd_req_callback; in __rbd_obj_add_osd_request()
1443 req->r_priv = obj_req; in __rbd_obj_add_osd_request()
1449 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); in __rbd_obj_add_osd_request()
1450 req->r_base_oloc.pool = rbd_dev->layout.pool_id; in __rbd_obj_add_osd_request()
1452 ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, in __rbd_obj_add_osd_request()
1453 rbd_dev->header.object_prefix, in __rbd_obj_add_osd_request()
1454 obj_req->ex.oe_objno); in __rbd_obj_add_osd_request()
1464 rbd_assert(obj_req->img_request->snapc); in rbd_obj_add_osd_request()
1465 return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc, in rbd_obj_add_osd_request()
1477 ceph_object_extent_init(&obj_request->ex); in rbd_obj_request_create()
1478 INIT_LIST_HEAD(&obj_request->osd_reqs); in rbd_obj_request_create()
1479 mutex_init(&obj_request->state_mutex); in rbd_obj_request_create()
1480 kref_init(&obj_request->kref); in rbd_obj_request_create()
1482 dout("%s %p\n", __func__, obj_request); in rbd_obj_request_create()
1494 dout("%s: obj %p\n", __func__, obj_request); in rbd_obj_request_destroy()
1496 while (!list_empty(&obj_request->osd_reqs)) { in rbd_obj_request_destroy()
1497 osd_req = list_first_entry(&obj_request->osd_reqs, in rbd_obj_request_destroy()
1499 list_del_init(&osd_req->r_private_item); in rbd_obj_request_destroy()
1503 switch (obj_request->img_request->data_type) { in rbd_obj_request_destroy()
1509 kfree(obj_request->bvec_pos.bvecs); in rbd_obj_request_destroy()
1511 default: in rbd_obj_request_destroy()
1515 kfree(obj_request->img_extents); in rbd_obj_request_destroy()
1516 if (obj_request->copyup_bvecs) { in rbd_obj_request_destroy()
1517 for (i = 0; i < obj_request->copyup_bvec_count; i++) { in rbd_obj_request_destroy()
1518 if (obj_request->copyup_bvecs[i].bv_page) in rbd_obj_request_destroy()
1519 __free_page(obj_request->copyup_bvecs[i].bv_page); in rbd_obj_request_destroy()
1521 kfree(obj_request->copyup_bvecs); in rbd_obj_request_destroy()
1533 rbd_spec_put(rbd_dev->parent_spec); in rbd_dev_unparent()
1534 rbd_dev->parent_spec = NULL; in rbd_dev_unparent()
1535 rbd_dev->parent_overlap = 0; in rbd_dev_unparent()
1540 * image's parent fields can be safely torn down--after there are no
1541 * more in-flight requests to the parent image. When the last
1548 if (!rbd_dev->parent_spec) in rbd_dev_parent_put()
1551 counter = atomic_dec_return_safe(&rbd_dev->parent_ref); in rbd_dev_parent_put()
1564 * If an image has a non-zero parent overlap, get a reference to its
1567 * Returns true if the rbd device has a parent with a non-zero
1575 if (!rbd_dev->parent_spec) in rbd_dev_parent_get()
1578 if (rbd_dev->parent_overlap) in rbd_dev_parent_get()
1579 counter = atomic_inc_return_safe(&rbd_dev->parent_ref); in rbd_dev_parent_get()
1593 img_request->rbd_dev = rbd_dev; in rbd_img_request_init()
1594 img_request->op_type = op_type; in rbd_img_request_init()
1596 INIT_LIST_HEAD(&img_request->lock_item); in rbd_img_request_init()
1597 INIT_LIST_HEAD(&img_request->object_extents); in rbd_img_request_init()
1598 mutex_init(&img_request->state_mutex); in rbd_img_request_init()
1608 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_capture_header()
1610 lockdep_assert_held(&rbd_dev->header_rwsem); in rbd_img_capture_header()
1613 img_req->snap_id = rbd_dev->spec->snap_id; in rbd_img_capture_header()
1624 dout("%s: img %p\n", __func__, img_request); in rbd_img_request_destroy()
1626 WARN_ON(!list_empty(&img_request->lock_item)); in rbd_img_request_destroy()
1631 rbd_dev_parent_put(img_request->rbd_dev); in rbd_img_request_destroy()
1634 ceph_put_snap_context(img_request->snapc); in rbd_img_request_destroy()
1636 if (test_bit(IMG_REQ_CHILD, &img_request->flags)) in rbd_img_request_destroy()
1640 #define BITS_PER_OBJ 2
1642 #define OBJ_MASK ((1 << BITS_PER_OBJ) - 1)
1649 rbd_assert(objno < rbd_dev->object_map_size); in __rbd_object_map_index()
1651 *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ; in __rbd_object_map_index()
1659 lockdep_assert_held(&rbd_dev->object_map_lock); in __rbd_object_map_get()
1661 return (rbd_dev->object_map[index] >> shift) & OBJ_MASK; in __rbd_object_map_get()
1670 lockdep_assert_held(&rbd_dev->object_map_lock); in __rbd_object_map_set()
1674 p = &rbd_dev->object_map[index]; in __rbd_object_map_set()
1682 spin_lock(&rbd_dev->object_map_lock); in rbd_object_map_get()
1684 spin_unlock(&rbd_dev->object_map_lock); in rbd_object_map_get()
1691 * An image mapped read-only can't use the object map -- it isn't in use_object_map()
1701 return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) && in use_object_map()
1702 !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)); in use_object_map()
1709 /* fall back to default logic if object map is disabled or invalid */ in rbd_object_map_may_exist()
1722 rbd_dev->spec->image_id); in rbd_object_map_name()
1725 rbd_dev->spec->image_id, snap_id); in rbd_object_map_name()
1730 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_lock()
1742 ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_object_map_lock()
1744 if (ret != -EBUSY || broke_lock) { in rbd_object_map_lock()
1745 if (ret == -EEXIST) in rbd_object_map_lock()
1752 ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc, in rbd_object_map_lock()
1756 if (ret == -ENOENT) in rbd_object_map_lock()
1770 ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc, in rbd_object_map_lock()
1775 if (ret == -ENOENT) in rbd_object_map_lock()
1788 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_unlock()
1794 ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_object_map_unlock()
1796 if (ret && ret != -ENOENT) in rbd_object_map_unlock()
1822 return -EINVAL; in decode_object_map_header()
1827 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_object_map_load()
1838 rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size); in __rbd_object_map_load()
1840 num_objects = ceph_get_num_objects(&rbd_dev->layout, in __rbd_object_map_load()
1841 rbd_dev->mapping.size); in __rbd_object_map_load()
1850 rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid); in __rbd_object_map_load()
1851 ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc, in __rbd_object_map_load()
1866 ret = -EINVAL; in __rbd_object_map_load()
1871 ret = -EINVAL; in __rbd_object_map_load()
1875 rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL); in __rbd_object_map_load()
1876 if (!rbd_dev->object_map) { in __rbd_object_map_load()
1877 ret = -ENOMEM; in __rbd_object_map_load()
1881 rbd_dev->object_map_size = object_map_size; in __rbd_object_map_load()
1882 ceph_copy_from_page_vector(pages, rbd_dev->object_map, in __rbd_object_map_load()
1892 kvfree(rbd_dev->object_map); in rbd_object_map_free()
1893 rbd_dev->object_map = NULL; in rbd_object_map_free()
1894 rbd_dev->object_map_size = 0; in rbd_object_map_free()
1911 if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID) in rbd_object_map_load()
1953 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_object_map_update_finish()
1960 if (osd_req->r_result) in rbd_object_map_update_finish()
1961 return osd_req->r_result; in rbd_object_map_update_finish()
1966 if (osd_req->r_num_ops == 1) in rbd_object_map_update_finish()
1970 * Update in-memory HEAD object map. in rbd_object_map_update_finish()
1972 rbd_assert(osd_req->r_num_ops == 2); in rbd_object_map_update_finish()
1974 rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES); in rbd_object_map_update_finish()
1976 p = page_address(osd_data->pages[0]); in rbd_object_map_update_finish()
1978 rbd_assert(objno == obj_req->ex.oe_objno); in rbd_object_map_update_finish()
1985 spin_lock(&rbd_dev->object_map_lock); in rbd_object_map_update_finish()
1990 spin_unlock(&rbd_dev->object_map_lock); in rbd_object_map_update_finish()
1997 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_object_map_callback()
2000 dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, in rbd_object_map_callback()
2001 osd_req->r_result, obj_req); in rbd_object_map_callback()
2046 osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0, in rbd_cls_object_map_update()
2053 * 0 - object map update sent
2054 * 1 - object map update isn't needed
2055 * <0 - error
2060 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_object_map_update()
2061 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_object_map_update()
2068 if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state)) in rbd_object_map_update()
2076 return -ENOMEM; in rbd_object_map_update()
2078 list_add_tail(&req->r_private_item, &obj_req->osd_reqs); in rbd_object_map_update()
2079 req->r_callback = rbd_object_map_callback; in rbd_object_map_update()
2080 req->r_priv = obj_req; in rbd_object_map_update()
2082 rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid); in rbd_object_map_update()
2083 ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); in rbd_object_map_update()
2084 req->r_flags = CEPH_OSD_FLAG_WRITE; in rbd_object_map_update()
2085 ktime_get_real_ts64(&req->r_mtime); in rbd_object_map_update()
2098 ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno, in rbd_object_map_update()
2117 while (cnt && img_extents[cnt - 1].fe_off >= overlap) in prune_extents()
2118 cnt--; in prune_extents()
2121 struct ceph_file_extent *ex = &img_extents[cnt - 1]; in prune_extents()
2124 if (ex->fe_off + ex->fe_len > overlap) in prune_extents()
2125 ex->fe_len = overlap - ex->fe_off; in prune_extents()
2138 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_calc_img_extents()
2141 if (!rbd_dev->parent_overlap) in rbd_obj_calc_img_extents()
2144 ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno, in rbd_obj_calc_img_extents()
2145 entire ? 0 : obj_req->ex.oe_off, in rbd_obj_calc_img_extents()
2146 entire ? rbd_dev->layout.object_size : in rbd_obj_calc_img_extents()
2147 obj_req->ex.oe_len, in rbd_obj_calc_img_extents()
2148 &obj_req->img_extents, in rbd_obj_calc_img_extents()
2149 &obj_req->num_img_extents); in rbd_obj_calc_img_extents()
2153 prune_extents(obj_req->img_extents, &obj_req->num_img_extents, in rbd_obj_calc_img_extents()
2154 rbd_dev->parent_overlap); in rbd_obj_calc_img_extents()
2160 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_data()
2162 switch (obj_req->img_request->data_type) { in rbd_osd_setup_data()
2165 &obj_req->bio_pos, in rbd_osd_setup_data()
2166 obj_req->ex.oe_len); in rbd_osd_setup_data()
2170 rbd_assert(obj_req->bvec_pos.iter.bi_size == in rbd_osd_setup_data()
2171 obj_req->ex.oe_len); in rbd_osd_setup_data()
2172 rbd_assert(obj_req->bvec_idx == obj_req->bvec_count); in rbd_osd_setup_data()
2174 &obj_req->bvec_pos); in rbd_osd_setup_data()
2176 default: in rbd_osd_setup_data()
2207 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_copyup()
2214 osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs, in rbd_osd_setup_copyup()
2215 obj_req->copyup_bvec_count, bytes); in rbd_osd_setup_copyup()
2221 obj_req->read_state = RBD_OBJ_READ_START; in rbd_obj_init_read()
2228 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_write_ops()
2229 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in __rbd_osd_setup_write_ops()
2233 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) { in __rbd_osd_setup_write_ops()
2235 rbd_dev->layout.object_size, in __rbd_osd_setup_write_ops()
2236 rbd_dev->layout.object_size, in __rbd_osd_setup_write_ops()
2237 rbd_dev->opts->alloc_hint_flags); in __rbd_osd_setup_write_ops()
2246 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); in __rbd_osd_setup_write_ops()
2259 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_write()
2272 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_discard_ops()
2274 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) { in __rbd_osd_setup_discard_ops()
2275 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); in __rbd_osd_setup_discard_ops()
2280 obj_req->ex.oe_off, obj_req->ex.oe_len, in __rbd_osd_setup_discard_ops()
2287 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_init_discard()
2299 if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size || in rbd_obj_init_discard()
2301 off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size); in rbd_obj_init_discard()
2302 next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len, in rbd_obj_init_discard()
2303 rbd_dev->opts->alloc_size); in rbd_obj_init_discard()
2307 dout("%s %p %llu~%llu -> %llu~%llu\n", __func__, in rbd_obj_init_discard()
2308 obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len, in rbd_obj_init_discard()
2309 off, next_off - off); in rbd_obj_init_discard()
2310 obj_req->ex.oe_off = off; in rbd_obj_init_discard()
2311 obj_req->ex.oe_len = next_off - off; in rbd_obj_init_discard()
2319 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; in rbd_obj_init_discard()
2320 if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) in rbd_obj_init_discard()
2321 obj_req->flags |= RBD_OBJ_FLAG_DELETION; in rbd_obj_init_discard()
2323 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_discard()
2330 struct rbd_obj_request *obj_req = osd_req->r_priv; in __rbd_osd_setup_zeroout_ops()
2334 if (obj_req->num_img_extents) { in __rbd_osd_setup_zeroout_ops()
2335 if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) in __rbd_osd_setup_zeroout_ops()
2340 rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); in __rbd_osd_setup_zeroout_ops()
2351 obj_req->ex.oe_off, obj_req->ex.oe_len, in __rbd_osd_setup_zeroout_ops()
2364 if (!obj_req->num_img_extents) { in rbd_obj_init_zeroout()
2365 obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; in rbd_obj_init_zeroout()
2367 obj_req->flags |= RBD_OBJ_FLAG_DELETION; in rbd_obj_init_zeroout()
2370 obj_req->write_state = RBD_OBJ_WRITE_START; in rbd_obj_init_zeroout()
2376 struct rbd_img_request *img_req = obj_req->img_request; in count_write_ops()
2378 switch (img_req->op_type) { in count_write_ops()
2380 if (!use_object_map(img_req->rbd_dev) || in count_write_ops()
2381 !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) in count_write_ops()
2382 return 2; /* setallochint + write/writefull */ in count_write_ops()
2388 if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents && in count_write_ops()
2389 !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) in count_write_ops()
2390 return 2; /* create + truncate */ in count_write_ops()
2393 default: in count_write_ops()
2401 struct rbd_obj_request *obj_req = osd_req->r_priv; in rbd_osd_setup_write_ops()
2403 switch (obj_req->img_request->op_type) { in rbd_osd_setup_write_ops()
2413 default: in rbd_osd_setup_write_ops()
2429 switch (img_req->op_type) { in __rbd_img_fill_request()
2442 default: in __rbd_img_fill_request()
2453 img_req->state = RBD_IMG_START; in __rbd_img_fill_request()
2481 return &obj_req->ex; in alloc_object_extent()
2487 * because ->set_pos_fn() should be called only once per object.
2493 return l->stripe_unit != l->object_size; in rbd_layout_is_fancy()
2504 img_req->data_type = fctx->pos_type; in rbd_img_fill_request_nocopy()
2510 fctx->iter = *fctx->pos; in rbd_img_fill_request_nocopy()
2512 ret = ceph_file_to_extents(&img_req->rbd_dev->layout, in rbd_img_fill_request_nocopy()
2515 &img_req->object_extents, in rbd_img_fill_request_nocopy()
2517 fctx->set_pos_fn, &fctx->iter); in rbd_img_fill_request_nocopy()
2530 * @fctx->pos data buffer.
2534 * different chunks of @fctx->pos data buffer.
2536 * @fctx->pos data buffer is assumed to be large enough.
2543 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_fill_request()
2548 if (fctx->pos_type == OBJ_REQUEST_NODATA || in rbd_img_fill_request()
2549 !rbd_layout_is_fancy(&rbd_dev->layout)) in rbd_img_fill_request()
2553 img_req->data_type = OBJ_REQUEST_OWN_BVECS; in rbd_img_fill_request()
2556 * Create object requests and determine ->bvec_count for each object in rbd_img_fill_request()
2557 * request. Note that ->bvec_count sum over all object requests may in rbd_img_fill_request()
2562 fctx->iter = *fctx->pos; in rbd_img_fill_request()
2564 ret = ceph_file_to_extents(&rbd_dev->layout, in rbd_img_fill_request()
2567 &img_req->object_extents, in rbd_img_fill_request()
2569 fctx->count_fn, &fctx->iter); in rbd_img_fill_request()
2575 obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, in rbd_img_fill_request()
2576 sizeof(*obj_req->bvec_pos.bvecs), in rbd_img_fill_request()
2578 if (!obj_req->bvec_pos.bvecs) in rbd_img_fill_request()
2579 return -ENOMEM; in rbd_img_fill_request()
2586 fctx->iter = *fctx->pos; in rbd_img_fill_request()
2588 ret = ceph_iterate_extents(&rbd_dev->layout, in rbd_img_fill_request()
2591 &img_req->object_extents, in rbd_img_fill_request()
2592 fctx->copy_fn, &fctx->iter); in rbd_img_fill_request()
2619 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in set_bio_pos()
2620 obj_req->bio_pos = *it; in set_bio_pos()
2630 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in count_bio_bvecs()
2632 obj_req->bvec_count++; in count_bio_bvecs()
2643 dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); in copy_bio_bvecs()
2645 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; in copy_bio_bvecs()
2646 obj_req->bvec_pos.iter.bi_size += bv.bv_len; in copy_bio_bvecs()
2671 struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter }; in rbd_img_fill_from_bio()
2682 obj_req->bvec_pos = *it; in set_bvec_pos()
2683 ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes); in set_bvec_pos()
2694 obj_req->bvec_count++; in count_bvecs()
2705 obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; in copy_bvecs()
2706 obj_req->bvec_pos.iter.bi_size += bv.bv_len; in copy_bvecs()
2747 rbd_img_handle_request(img_req, img_req->work_result); in rbd_img_handle_request_work()
2752 INIT_WORK(&img_req->work, rbd_img_handle_request_work); in rbd_img_schedule()
2753 img_req->work_result = result; in rbd_img_schedule()
2754 queue_work(rbd_wq, &img_req->work); in rbd_img_schedule()
2759 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_may_exist()
2761 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) { in rbd_obj_may_exist()
2762 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; in rbd_obj_may_exist()
2766 dout("%s %p objno %llu assuming dne\n", __func__, obj_req, in rbd_obj_may_exist()
2767 obj_req->ex.oe_objno); in rbd_obj_may_exist()
2781 obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); in rbd_obj_read_object()
2795 struct rbd_img_request *img_req = obj_req->img_request; in rbd_obj_read_from_parent()
2796 struct rbd_device *parent = img_req->rbd_dev->parent; in rbd_obj_read_from_parent()
2802 return -ENOMEM; in rbd_obj_read_from_parent()
2805 __set_bit(IMG_REQ_CHILD, &child_img_req->flags); in rbd_obj_read_from_parent()
2806 child_img_req->obj_request = obj_req; in rbd_obj_read_from_parent()
2808 down_read(&parent->header_rwsem); in rbd_obj_read_from_parent()
2810 up_read(&parent->header_rwsem); in rbd_obj_read_from_parent()
2812 dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req, in rbd_obj_read_from_parent()
2816 switch (img_req->data_type) { in rbd_obj_read_from_parent()
2819 obj_req->img_extents, in rbd_obj_read_from_parent()
2820 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2821 &obj_req->bio_pos); in rbd_obj_read_from_parent()
2826 obj_req->img_extents, in rbd_obj_read_from_parent()
2827 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2828 &obj_req->bvec_pos); in rbd_obj_read_from_parent()
2830 default: in rbd_obj_read_from_parent()
2835 obj_req->img_extents, in rbd_obj_read_from_parent()
2836 obj_req->num_img_extents, in rbd_obj_read_from_parent()
2837 obj_req->copyup_bvecs); in rbd_obj_read_from_parent()
2851 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_read()
2855 switch (obj_req->read_state) { in rbd_obj_advance_read()
2860 *result = -ENOENT; in rbd_obj_advance_read()
2861 obj_req->read_state = RBD_OBJ_READ_OBJECT; in rbd_obj_advance_read()
2870 obj_req->read_state = RBD_OBJ_READ_OBJECT; in rbd_obj_advance_read()
2873 if (*result == -ENOENT && rbd_dev->parent_overlap) { in rbd_obj_advance_read()
2880 if (obj_req->num_img_extents) { in rbd_obj_advance_read()
2886 obj_req->read_state = RBD_OBJ_READ_PARENT; in rbd_obj_advance_read()
2892 * -ENOENT means a hole in the image -- zero-fill the entire in rbd_obj_advance_read()
2893 * length of the request. A short read also implies zero-fill in rbd_obj_advance_read()
2896 if (*result == -ENOENT) { in rbd_obj_advance_read()
2897 rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len); in rbd_obj_advance_read()
2900 if (*result < obj_req->ex.oe_len) in rbd_obj_advance_read()
2902 obj_req->ex.oe_len - *result); in rbd_obj_advance_read()
2904 rbd_assert(*result == obj_req->ex.oe_len); in rbd_obj_advance_read()
2910 * The parent image is read only up to the overlap -- zero-fill in rbd_obj_advance_read()
2916 if (obj_overlap < obj_req->ex.oe_len) in rbd_obj_advance_read()
2918 obj_req->ex.oe_len - obj_overlap); in rbd_obj_advance_read()
2921 default: in rbd_obj_advance_read()
2928 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_is_noop()
2930 if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) in rbd_obj_write_is_noop()
2931 obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; in rbd_obj_write_is_noop()
2933 if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) && in rbd_obj_write_is_noop()
2934 (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) { in rbd_obj_write_is_noop()
2935 dout("%s %p noop for nonexistent\n", __func__, obj_req); in rbd_obj_write_is_noop()
2944 * 0 - object map update sent
2945 * 1 - object map update isn't needed
2946 * <0 - error
2950 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_pre_object_map()
2953 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_write_pre_object_map()
2956 if (obj_req->flags & RBD_OBJ_FLAG_DELETION) in rbd_obj_write_pre_object_map()
2971 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) in rbd_obj_write_object()
2978 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { in rbd_obj_write_object()
3020 dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); in rbd_obj_copyup_empty_snapc()
3049 dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); in rbd_obj_copyup_current_snapc()
3079 rbd_assert(!obj_req->copyup_bvecs); in setup_copyup_bvecs()
3080 obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap); in setup_copyup_bvecs()
3081 obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count, in setup_copyup_bvecs()
3082 sizeof(*obj_req->copyup_bvecs), in setup_copyup_bvecs()
3084 if (!obj_req->copyup_bvecs) in setup_copyup_bvecs()
3085 return -ENOMEM; in setup_copyup_bvecs()
3087 for (i = 0; i < obj_req->copyup_bvec_count; i++) { in setup_copyup_bvecs()
3092 return -ENOMEM; in setup_copyup_bvecs()
3094 bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0); in setup_copyup_bvecs()
3095 obj_overlap -= len; in setup_copyup_bvecs()
3109 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_copyup_read_parent()
3112 rbd_assert(obj_req->num_img_extents); in rbd_obj_copyup_read_parent()
3113 prune_extents(obj_req->img_extents, &obj_req->num_img_extents, in rbd_obj_copyup_read_parent()
3114 rbd_dev->parent_overlap); in rbd_obj_copyup_read_parent()
3115 if (!obj_req->num_img_extents) { in rbd_obj_copyup_read_parent()
3118 * image has been flattened). Re-submit the original write in rbd_obj_copyup_read_parent()
3119 * request -- pass MODS_ONLY since the copyup isn't needed in rbd_obj_copyup_read_parent()
3134 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_copyup_object_maps()
3135 struct ceph_snap_context *snapc = obj_req->img_request->snapc; in rbd_obj_copyup_object_maps()
3140 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); in rbd_obj_copyup_object_maps()
3142 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_copyup_object_maps()
3145 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) in rbd_obj_copyup_object_maps()
3148 for (i = 0; i < snapc->num_snaps; i++) { in rbd_obj_copyup_object_maps()
3149 if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) && in rbd_obj_copyup_object_maps()
3150 i + 1 < snapc->num_snaps) in rbd_obj_copyup_object_maps()
3155 ret = rbd_object_map_update(obj_req, snapc->snaps[i], in rbd_obj_copyup_object_maps()
3158 obj_req->pending.result = ret; in rbd_obj_copyup_object_maps()
3163 obj_req->pending.num_pending++; in rbd_obj_copyup_object_maps()
3172 rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); in rbd_obj_copyup_write_object()
3175 * Only send non-zero copyup data to save some I/O and network in rbd_obj_copyup_write_object()
3176 * bandwidth -- zero copyup data is equivalent to the object not in rbd_obj_copyup_write_object()
3179 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) in rbd_obj_copyup_write_object()
3182 if (obj_req->img_request->snapc->num_snaps && bytes > 0) { in rbd_obj_copyup_write_object()
3185 * deep-copyup the object through all existing snapshots. in rbd_obj_copyup_write_object()
3191 obj_req->pending.result = ret; in rbd_obj_copyup_write_object()
3195 obj_req->pending.num_pending++; in rbd_obj_copyup_write_object()
3201 obj_req->pending.result = ret; in rbd_obj_copyup_write_object()
3205 obj_req->pending.num_pending++; in rbd_obj_copyup_write_object()
3210 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_copyup()
3214 switch (obj_req->copyup_state) { in rbd_obj_advance_copyup()
3223 if (obj_req->num_img_extents) in rbd_obj_advance_copyup()
3224 obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; in rbd_obj_advance_copyup()
3226 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3232 if (is_zero_bvecs(obj_req->copyup_bvecs, in rbd_obj_advance_copyup()
3234 dout("%s %p detected zeros\n", __func__, obj_req); in rbd_obj_advance_copyup()
3235 obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; in rbd_obj_advance_copyup()
3239 if (!obj_req->pending.num_pending) { in rbd_obj_advance_copyup()
3240 *result = obj_req->pending.result; in rbd_obj_advance_copyup()
3241 obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS; in rbd_obj_advance_copyup()
3244 obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS; in rbd_obj_advance_copyup()
3247 if (!pending_result_dec(&obj_req->pending, result)) in rbd_obj_advance_copyup()
3258 if (!obj_req->pending.num_pending) { in rbd_obj_advance_copyup()
3259 *result = obj_req->pending.result; in rbd_obj_advance_copyup()
3260 obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3263 obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; in rbd_obj_advance_copyup()
3266 if (!pending_result_dec(&obj_req->pending, result)) in rbd_obj_advance_copyup()
3271 default: in rbd_obj_advance_copyup()
3278 * 0 - object map update sent
3279 * 1 - object map update isn't needed
3280 * <0 - error
3284 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_write_post_object_map()
3287 if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in rbd_obj_write_post_object_map()
3290 if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION)) in rbd_obj_write_post_object_map()
3299 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; in rbd_obj_advance_write()
3303 switch (obj_req->write_state) { in rbd_obj_advance_write()
3316 obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP; in rbd_obj_advance_write()
3331 obj_req->write_state = RBD_OBJ_WRITE_OBJECT; in rbd_obj_advance_write()
3334 if (*result == -ENOENT) { in rbd_obj_advance_write()
3335 if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { in rbd_obj_advance_write()
3337 obj_req->copyup_state = RBD_OBJ_COPYUP_START; in rbd_obj_advance_write()
3338 obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; in rbd_obj_advance_write()
3342 * On a non-existent object: in rbd_obj_advance_write()
3343 * delete - -ENOENT, truncate/zero - 0 in rbd_obj_advance_write()
3345 if (obj_req->flags & RBD_OBJ_FLAG_DELETION) in rbd_obj_advance_write()
3351 obj_req->write_state = RBD_OBJ_WRITE_COPYUP; in rbd_obj_advance_write()
3367 obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP; in rbd_obj_advance_write()
3376 default: in rbd_obj_advance_write()
3387 struct rbd_img_request *img_req = obj_req->img_request; in __rbd_obj_handle_request()
3388 struct rbd_device *rbd_dev = img_req->rbd_dev; in __rbd_obj_handle_request()
3391 mutex_lock(&obj_req->state_mutex); in __rbd_obj_handle_request()
3396 mutex_unlock(&obj_req->state_mutex); in __rbd_obj_handle_request()
3401 obj_op_name(img_req->op_type), obj_req->ex.oe_objno, in __rbd_obj_handle_request()
3402 obj_req->ex.oe_off, obj_req->ex.oe_len, *result); in __rbd_obj_handle_request()
3408 * This is open-coded in rbd_img_handle_request() to avoid parent chain
3414 rbd_img_handle_request(obj_req->img_request, result); in rbd_obj_handle_request()
3419 struct rbd_device *rbd_dev = img_req->rbd_dev; in need_exclusive_lock()
3421 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) in need_exclusive_lock()
3427 rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); in need_exclusive_lock()
3428 if (rbd_dev->opts->lock_on_read || in need_exclusive_lock()
3429 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) in need_exclusive_lock()
3437 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_lock_add_request()
3440 lockdep_assert_held(&rbd_dev->lock_rwsem); in rbd_lock_add_request()
3441 locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED; in rbd_lock_add_request()
3442 spin_lock(&rbd_dev->lock_lists_lock); in rbd_lock_add_request()
3443 rbd_assert(list_empty(&img_req->lock_item)); in rbd_lock_add_request()
3445 list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list); in rbd_lock_add_request()
3447 list_add_tail(&img_req->lock_item, &rbd_dev->running_list); in rbd_lock_add_request()
3448 spin_unlock(&rbd_dev->lock_lists_lock); in rbd_lock_add_request()
3454 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_lock_del_request()
3457 lockdep_assert_held(&rbd_dev->lock_rwsem); in rbd_lock_del_request()
3458 spin_lock(&rbd_dev->lock_lists_lock); in rbd_lock_del_request()
3459 if (!list_empty(&img_req->lock_item)) { in rbd_lock_del_request()
3460 list_del_init(&img_req->lock_item); in rbd_lock_del_request()
3461 need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && in rbd_lock_del_request()
3462 list_empty(&rbd_dev->running_list)); in rbd_lock_del_request()
3464 spin_unlock(&rbd_dev->lock_lists_lock); in rbd_lock_del_request()
3466 complete(&rbd_dev->releasing_wait); in rbd_lock_del_request()
3471 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_exclusive_lock()
3479 if (rbd_dev->opts->exclusive) { in rbd_img_exclusive_lock()
3481 return -EROFS; in rbd_img_exclusive_lock()
3488 dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev); in rbd_img_exclusive_lock()
3489 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_img_exclusive_lock()
3495 struct rbd_device *rbd_dev = img_req->rbd_dev; in rbd_img_object_requests()
3498 rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); in rbd_img_object_requests()
3503 rbd_assert(!img_req->snapc); in rbd_img_object_requests()
3504 down_read(&rbd_dev->header_rwsem); in rbd_img_object_requests()
3505 img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); in rbd_img_object_requests()
3506 up_read(&rbd_dev->header_rwsem); in rbd_img_object_requests()
3514 img_req->pending.result = result; in rbd_img_object_requests()
3518 img_req->pending.num_pending++; in rbd_img_object_requests()
3528 switch (img_req->state) { in rbd_img_advance()
3537 img_req->state = RBD_IMG_EXCLUSIVE_LOCK; in rbd_img_advance()
3546 if (!img_req->pending.num_pending) { in rbd_img_advance()
3547 *result = img_req->pending.result; in rbd_img_advance()
3548 img_req->state = RBD_IMG_OBJECT_REQUESTS; in rbd_img_advance()
3551 img_req->state = __RBD_IMG_OBJECT_REQUESTS; in rbd_img_advance()
3554 if (!pending_result_dec(&img_req->pending, result)) in rbd_img_advance()
3559 default: in rbd_img_advance()
3570 struct rbd_device *rbd_dev = img_req->rbd_dev; in __rbd_img_handle_request()
3574 down_read(&rbd_dev->lock_rwsem); in __rbd_img_handle_request()
3575 mutex_lock(&img_req->state_mutex); in __rbd_img_handle_request()
3579 mutex_unlock(&img_req->state_mutex); in __rbd_img_handle_request()
3580 up_read(&rbd_dev->lock_rwsem); in __rbd_img_handle_request()
3582 mutex_lock(&img_req->state_mutex); in __rbd_img_handle_request()
3584 mutex_unlock(&img_req->state_mutex); in __rbd_img_handle_request()
3590 test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "", in __rbd_img_handle_request()
3591 obj_op_name(img_req->op_type), *result); in __rbd_img_handle_request()
3602 if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { in rbd_img_handle_request()
3603 struct rbd_obj_request *obj_req = img_req->obj_request; in rbd_img_handle_request()
3607 img_req = obj_req->img_request; in rbd_img_handle_request()
3623 return lhs->gid == rhs->gid && lhs->handle == rhs->handle; in rbd_cid_equal()
3630 mutex_lock(&rbd_dev->watch_mutex); in rbd_get_cid()
3631 cid.gid = ceph_client_gid(rbd_dev->rbd_client->client); in rbd_get_cid()
3632 cid.handle = rbd_dev->watch_cookie; in rbd_get_cid()
3633 mutex_unlock(&rbd_dev->watch_mutex); in rbd_get_cid()
3643 dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev, in rbd_set_owner_cid()
3644 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle, in rbd_set_owner_cid()
3645 cid->gid, cid->handle); in rbd_set_owner_cid()
3646 rbd_dev->owner_cid = *cid; /* struct */ in rbd_set_owner_cid()
3651 mutex_lock(&rbd_dev->watch_mutex); in format_lock_cookie()
3652 sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie); in format_lock_cookie()
3653 mutex_unlock(&rbd_dev->watch_mutex); in format_lock_cookie()
3660 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; in __rbd_lock()
3661 strcpy(rbd_dev->lock_cookie, cookie); in __rbd_lock()
3663 queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); in __rbd_lock()
3671 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_lock()
3676 rbd_dev->lock_cookie[0] != '\0'); in rbd_lock()
3679 ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in rbd_lock()
3682 if (ret && ret != -EEXIST) in rbd_lock()
3694 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_unlock()
3698 rbd_dev->lock_cookie[0] == '\0'); in rbd_unlock()
3700 ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in rbd_unlock()
3701 RBD_LOCK_NAME, rbd_dev->lock_cookie); in rbd_unlock()
3702 if (ret && ret != -ENOENT) in rbd_unlock()
3706 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; in rbd_unlock()
3707 rbd_dev->lock_cookie[0] = '\0'; in rbd_unlock()
3709 queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work); in rbd_unlock()
3717 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_notify_op_lock()
3723 dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op); in __rbd_notify_op_lock()
3726 ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN); in __rbd_notify_op_lock()
3731 return ceph_osdc_notify(osdc, &rbd_dev->header_oid, in __rbd_notify_op_lock()
3732 &rbd_dev->header_oloc, buf, buf_size, in __rbd_notify_op_lock()
3765 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_request_lock()
3769 if (ret && ret != -ETIMEDOUT) { in rbd_request_lock()
3780 while (n--) { in rbd_request_lock()
3794 ret = -EIO; in rbd_request_lock()
3814 ret = -ETIMEDOUT; in rbd_request_lock()
3822 ret = -EINVAL; in rbd_request_lock()
3834 dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); in wake_lock_waiters()
3835 lockdep_assert_held_write(&rbd_dev->lock_rwsem); in wake_lock_waiters()
3837 cancel_delayed_work(&rbd_dev->lock_dwork); in wake_lock_waiters()
3838 if (!completion_done(&rbd_dev->acquire_wait)) { in wake_lock_waiters()
3839 rbd_assert(list_empty(&rbd_dev->acquiring_list) && in wake_lock_waiters()
3840 list_empty(&rbd_dev->running_list)); in wake_lock_waiters()
3841 rbd_dev->acquire_err = result; in wake_lock_waiters()
3842 complete_all(&rbd_dev->acquire_wait); in wake_lock_waiters()
3846 while (!list_empty(&rbd_dev->acquiring_list)) { in wake_lock_waiters()
3847 img_req = list_first_entry(&rbd_dev->acquiring_list, in wake_lock_waiters()
3849 mutex_lock(&img_req->state_mutex); in wake_lock_waiters()
3850 rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); in wake_lock_waiters()
3852 list_move_tail(&img_req->lock_item, in wake_lock_waiters()
3853 &rbd_dev->running_list); in wake_lock_waiters()
3855 list_del_init(&img_req->lock_item); in wake_lock_waiters()
3857 mutex_unlock(&img_req->state_mutex); in wake_lock_waiters()
3864 return lhs->id.name.type == rhs->id.name.type && in locker_equal()
3865 lhs->id.name.num == rhs->id.name.num && in locker_equal()
3866 !strcmp(lhs->id.cookie, rhs->id.cookie) && in locker_equal()
3867 ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr); in locker_equal()
3878 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in get_lock_owner_info()
3886 ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid, in get_lock_owner_info()
3887 &rbd_dev->header_oloc, RBD_LOCK_NAME, in get_lock_owner_info()
3895 dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev); in get_lock_owner_info()
3924 dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n", in get_lock_owner_info()
3936 return ERR_PTR(-EBUSY); in get_lock_owner_info()
3942 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in find_watcher()
3949 ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid, in find_watcher()
3950 &rbd_dev->header_oloc, &watchers, in find_watcher()
3957 sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); in find_watcher()
3960 * Ignore addr->type while comparing. This mimics in find_watcher()
3964 &locker->info.addr) && in find_watcher()
3971 dout("%s rbd_dev %p found cid %llu-%llu\n", __func__, in find_watcher()
3979 dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev); in find_watcher()
3991 struct ceph_client *client = rbd_dev->rbd_client->client; in rbd_try_lock()
4001 if (ret != -EBUSY) { in rbd_try_lock()
4031 ENTITY_NAME(locker->id.name)); in rbd_try_lock()
4033 ret = ceph_monc_blocklist_add(&client->monc, in rbd_try_lock()
4034 &locker->info.addr); in rbd_try_lock()
4037 ENTITY_NAME(locker->id.name), ret); in rbd_try_lock()
4041 ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid, in rbd_try_lock()
4042 &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_try_lock()
4043 locker->id.cookie, &locker->id.name); in rbd_try_lock()
4044 if (ret && ret != -ENOENT) { in rbd_try_lock()
4069 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) { in rbd_post_acquire_action()
4080 * 0 - lock acquired
4081 * 1 - caller should call rbd_request_lock()
4082 * <0 - error
4088 down_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4089 dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev, in rbd_try_acquire_lock()
4090 rbd_dev->lock_state); in rbd_try_acquire_lock()
4092 up_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4096 up_read(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4097 down_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4098 dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev, in rbd_try_acquire_lock()
4099 rbd_dev->lock_state); in rbd_try_acquire_lock()
4101 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4111 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4115 rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED); in rbd_try_acquire_lock()
4116 rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_try_acquire_lock()
4120 rbd_warn(rbd_dev, "post-acquire action failed: %d", ret); in rbd_try_acquire_lock()
4131 up_write(&rbd_dev->lock_rwsem); in rbd_try_acquire_lock()
4141 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_acquire_lock()
4145 dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret); in rbd_acquire_lock()
4150 if (ret == -ETIMEDOUT) { in rbd_acquire_lock()
4152 } else if (ret == -EROFS) { in rbd_acquire_lock()
4154 down_write(&rbd_dev->lock_rwsem); in rbd_acquire_lock()
4156 up_write(&rbd_dev->lock_rwsem); in rbd_acquire_lock()
4159 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, in rbd_acquire_lock()
4166 dout("%s rbd_dev %p requeuing lock_dwork\n", __func__, in rbd_acquire_lock()
4168 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, in rbd_acquire_lock()
4169 msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC)); in rbd_acquire_lock()
4175 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_quiesce_lock()
4176 lockdep_assert_held_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4178 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED) in rbd_quiesce_lock()
4182 * Ensure that all in-flight IO is flushed. in rbd_quiesce_lock()
4184 rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; in rbd_quiesce_lock()
4185 rbd_assert(!completion_done(&rbd_dev->releasing_wait)); in rbd_quiesce_lock()
4186 if (list_empty(&rbd_dev->running_list)) in rbd_quiesce_lock()
4189 up_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4190 wait_for_completion(&rbd_dev->releasing_wait); in rbd_quiesce_lock()
4192 down_write(&rbd_dev->lock_rwsem); in rbd_quiesce_lock()
4193 if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) in rbd_quiesce_lock()
4196 rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_quiesce_lock()
4202 if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) in rbd_pre_release_action()
4208 rbd_assert(list_empty(&rbd_dev->running_list)); in __rbd_release_lock()
4225 * Give others a chance to grab the lock - we would re-acquire in rbd_release_lock()
4231 cancel_delayed_work(&rbd_dev->lock_dwork); in rbd_release_lock()
4239 down_write(&rbd_dev->lock_rwsem); in rbd_release_lock_work()
4241 up_write(&rbd_dev->lock_rwsem); in rbd_release_lock_work()
4248 dout("%s rbd_dev %p\n", __func__, rbd_dev); in maybe_kick_acquire()
4252 spin_lock(&rbd_dev->lock_lists_lock); in maybe_kick_acquire()
4253 have_requests = !list_empty(&rbd_dev->acquiring_list); in maybe_kick_acquire()
4254 spin_unlock(&rbd_dev->lock_lists_lock); in maybe_kick_acquire()
4255 if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) { in maybe_kick_acquire()
4256 dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev); in maybe_kick_acquire()
4257 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in maybe_kick_acquire()
4266 if (struct_v >= 2) { in rbd_handle_acquired_lock()
4271 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_acquired_lock()
4274 down_write(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4275 if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { in rbd_handle_acquired_lock()
4276 dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", in rbd_handle_acquired_lock()
4281 downgrade_write(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4283 down_read(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4287 up_read(&rbd_dev->lock_rwsem); in rbd_handle_acquired_lock()
4295 if (struct_v >= 2) { in rbd_handle_released_lock()
4300 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_released_lock()
4303 down_write(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4304 if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { in rbd_handle_released_lock()
4305 dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", in rbd_handle_released_lock()
4307 rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); in rbd_handle_released_lock()
4311 downgrade_write(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4313 down_read(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4317 up_read(&rbd_dev->lock_rwsem); in rbd_handle_released_lock()
4331 if (struct_v >= 2) { in rbd_handle_request_lock()
4336 dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, in rbd_handle_request_lock()
4341 down_read(&rbd_dev->lock_rwsem); in rbd_handle_request_lock()
4343 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED && in rbd_handle_request_lock()
4344 rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) in rbd_handle_request_lock()
4353 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { in rbd_handle_request_lock()
4354 if (!rbd_dev->opts->exclusive) { in rbd_handle_request_lock()
4355 dout("%s rbd_dev %p queueing unlock_work\n", in rbd_handle_request_lock()
4357 queue_work(rbd_dev->task_wq, in rbd_handle_request_lock()
4358 &rbd_dev->unlock_work); in rbd_handle_request_lock()
4361 result = -EROFS; in rbd_handle_request_lock()
4367 up_read(&rbd_dev->lock_rwsem); in rbd_handle_request_lock()
4374 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_acknowledge_notify()
4384 buf_size - CEPH_ENCODING_START_BLK_LEN); in __rbd_acknowledge_notify()
4390 ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid, in __rbd_acknowledge_notify()
4391 &rbd_dev->header_oloc, notify_id, cookie, in __rbd_acknowledge_notify()
4400 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_acknowledge_notify()
4407 dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); in rbd_acknowledge_notify_result()
4422 dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n", in rbd_watch_cb()
4440 dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op); in rbd_watch_cb()
4465 default: in rbd_watch_cb()
4468 cookie, -EOPNOTSUPP); in rbd_watch_cb()
4483 down_write(&rbd_dev->lock_rwsem); in rbd_watch_errcb()
4485 up_write(&rbd_dev->lock_rwsem); in rbd_watch_errcb()
4487 mutex_lock(&rbd_dev->watch_mutex); in rbd_watch_errcb()
4488 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) { in rbd_watch_errcb()
4490 rbd_dev->watch_state = RBD_WATCH_STATE_ERROR; in rbd_watch_errcb()
4492 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0); in rbd_watch_errcb()
4494 mutex_unlock(&rbd_dev->watch_mutex); in rbd_watch_errcb()
4502 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_register_watch()
4505 rbd_assert(!rbd_dev->watch_handle); in __rbd_register_watch()
4506 dout("%s rbd_dev %p\n", __func__, rbd_dev); in __rbd_register_watch()
4508 handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid, in __rbd_register_watch()
4509 &rbd_dev->header_oloc, rbd_watch_cb, in __rbd_register_watch()
4514 rbd_dev->watch_handle = handle; in __rbd_register_watch()
4523 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __rbd_unregister_watch()
4526 rbd_assert(rbd_dev->watch_handle); in __rbd_unregister_watch()
4527 dout("%s rbd_dev %p\n", __func__, rbd_dev); in __rbd_unregister_watch()
4529 ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle); in __rbd_unregister_watch()
4533 rbd_dev->watch_handle = NULL; in __rbd_unregister_watch()
4540 mutex_lock(&rbd_dev->watch_mutex); in rbd_register_watch()
4541 rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED); in rbd_register_watch()
4546 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; in rbd_register_watch()
4547 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; in rbd_register_watch()
4550 mutex_unlock(&rbd_dev->watch_mutex); in rbd_register_watch()
4556 dout("%s rbd_dev %p\n", __func__, rbd_dev); in cancel_tasks_sync()
4558 cancel_work_sync(&rbd_dev->acquired_lock_work); in cancel_tasks_sync()
4559 cancel_work_sync(&rbd_dev->released_lock_work); in cancel_tasks_sync()
4560 cancel_delayed_work_sync(&rbd_dev->lock_dwork); in cancel_tasks_sync()
4561 cancel_work_sync(&rbd_dev->unlock_work); in cancel_tasks_sync()
4572 mutex_lock(&rbd_dev->watch_mutex); in rbd_unregister_watch()
4573 if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) in rbd_unregister_watch()
4575 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; in rbd_unregister_watch()
4576 mutex_unlock(&rbd_dev->watch_mutex); in rbd_unregister_watch()
4578 cancel_delayed_work_sync(&rbd_dev->watch_dwork); in rbd_unregister_watch()
4579 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); in rbd_unregister_watch()
4587 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_reacquire_lock()
4595 ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid, in rbd_reacquire_lock()
4596 &rbd_dev->header_oloc, RBD_LOCK_NAME, in rbd_reacquire_lock()
4597 CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie, in rbd_reacquire_lock()
4600 if (ret != -EOPNOTSUPP) in rbd_reacquire_lock()
4609 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_reacquire_lock()
4622 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_reregister_watch()
4624 mutex_lock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4625 if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) { in rbd_reregister_watch()
4626 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4633 if (ret != -EBLOCKLISTED && ret != -ENOENT) { in rbd_reregister_watch()
4634 queue_delayed_work(rbd_dev->task_wq, in rbd_reregister_watch()
4635 &rbd_dev->watch_dwork, in rbd_reregister_watch()
4637 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4641 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4642 down_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4644 up_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4648 rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; in rbd_reregister_watch()
4649 rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; in rbd_reregister_watch()
4650 mutex_unlock(&rbd_dev->watch_mutex); in rbd_reregister_watch()
4652 down_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4653 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) in rbd_reregister_watch()
4655 up_write(&rbd_dev->lock_rwsem); in rbd_reregister_watch()
4675 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_obj_method_sync()
4683 * also supply outbound data--parameters for the object in rbd_obj_method_sync()
4689 return -E2BIG; in rbd_obj_method_sync()
4693 return -ENOMEM; in rbd_obj_method_sync()
4702 return -ENOMEM; in rbd_obj_method_sync()
4723 struct rbd_device *rbd_dev = img_request->rbd_dev; in rbd_queue_workfn()
4724 enum obj_operation_type op_type = img_request->op_type; in rbd_queue_workfn()
4731 /* Ignore/skip any zero-length requests */ in rbd_queue_workfn()
4733 dout("%s: zero-length request\n", __func__); in rbd_queue_workfn()
4740 down_read(&rbd_dev->header_rwsem); in rbd_queue_workfn()
4741 mapping_size = rbd_dev->mapping.size; in rbd_queue_workfn()
4743 up_read(&rbd_dev->header_rwsem); in rbd_queue_workfn()
4748 result = -EIO; in rbd_queue_workfn()
4752 dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, in rbd_queue_workfn()
4759 rq->bio); in rbd_queue_workfn()
4777 struct rbd_device *rbd_dev = hctx->queue->queuedata; in rbd_queue_rq()
4778 struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq); in rbd_queue_rq()
4781 switch (req_op(bd->rq)) { in rbd_queue_rq()
4794 default: in rbd_queue_rq()
4795 rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq)); in rbd_queue_rq()
4803 rbd_warn(rbd_dev, "%s on read-only mapping", in rbd_queue_rq()
4804 obj_op_name(img_req->op_type)); in rbd_queue_rq()
4810 INIT_WORK(&img_req->work, rbd_queue_workfn); in rbd_queue_rq()
4811 queue_work(rbd_wq, &img_req->work); in rbd_queue_rq()
4817 put_disk(rbd_dev->disk); in rbd_free_disk()
4818 blk_mq_free_tag_set(&rbd_dev->tag_set); in rbd_free_disk()
4819 rbd_dev->disk = NULL; in rbd_free_disk()
4828 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_obj_read_sync()
4836 return -ENOMEM; in rbd_obj_read_sync()
4838 ceph_oid_copy(&req->r_base_oid, oid); in rbd_obj_read_sync()
4839 ceph_oloc_copy(&req->r_base_oloc, oloc); in rbd_obj_read_sync()
4840 req->r_flags = CEPH_OSD_FLAG_READ; in rbd_obj_read_sync()
4868 * return, the rbd_dev->header field will contain up-to-date
4882 * The complete header will include an array of its 64-bit in rbd_dev_v1_header_info()
4884 * a contiguous block of NUL-terminated strings. Note that in rbd_dev_v1_header_info()
4886 * it in, in which case we re-read it. in rbd_dev_v1_header_info()
4898 return -ENOMEM; in rbd_dev_v1_header_info()
4900 ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v1_header_info()
4901 &rbd_dev->header_oloc, ondisk, size); in rbd_dev_v1_header_info()
4905 ret = -ENXIO; in rbd_dev_v1_header_info()
4911 ret = -ENXIO; in rbd_dev_v1_header_info()
4916 names_size = le64_to_cpu(ondisk->snap_names_len); in rbd_dev_v1_header_info()
4918 snap_count = le32_to_cpu(ondisk->snap_count); in rbd_dev_v1_header_info()
4933 * If EXISTS is not set, rbd_dev->disk may be NULL, so don't in rbd_dev_update_size()
4937 if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) && in rbd_dev_update_size()
4938 !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { in rbd_dev_update_size()
4939 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; in rbd_dev_update_size()
4940 dout("setting size to %llu sectors", (unsigned long long)size); in rbd_dev_update_size()
4941 set_capacity_and_notify(rbd_dev->disk, size); in rbd_dev_update_size()
4954 rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; in rbd_init_disk()
4957 memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); in rbd_init_disk()
4958 rbd_dev->tag_set.ops = &rbd_mq_ops; in rbd_init_disk()
4959 rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; in rbd_init_disk()
4960 rbd_dev->tag_set.numa_node = NUMA_NO_NODE; in rbd_init_disk()
4961 rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; in rbd_init_disk()
4962 rbd_dev->tag_set.nr_hw_queues = num_present_cpus(); in rbd_init_disk()
4963 rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); in rbd_init_disk()
4965 err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); in rbd_init_disk()
4969 disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev); in rbd_init_disk()
4974 q = disk->queue; in rbd_init_disk()
4976 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", in rbd_init_disk()
4977 rbd_dev->dev_id); in rbd_init_disk()
4978 disk->major = rbd_dev->major; in rbd_init_disk()
4979 disk->first_minor = rbd_dev->minor; in rbd_init_disk()
4981 disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT); in rbd_init_disk()
4983 disk->minors = RBD_MINORS_PER_MAJOR; in rbd_init_disk()
4984 disk->fops = &rbd_bd_ops; in rbd_init_disk()
4985 disk->private_data = rbd_dev; in rbd_init_disk()
4988 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ in rbd_init_disk()
4991 q->limits.max_sectors = queue_max_hw_sectors(q); in rbd_init_disk()
4994 blk_queue_io_min(q, rbd_dev->opts->alloc_size); in rbd_init_disk()
4995 blk_queue_io_opt(q, rbd_dev->opts->alloc_size); in rbd_init_disk()
4997 if (rbd_dev->opts->trim) { in rbd_init_disk()
4998 q->limits.discard_granularity = rbd_dev->opts->alloc_size; in rbd_init_disk()
5003 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) in rbd_init_disk()
5006 rbd_dev->disk = disk; in rbd_init_disk()
5010 blk_mq_free_tag_set(&rbd_dev->tag_set); in rbd_init_disk()
5029 (unsigned long long)rbd_dev->mapping.size); in rbd_size_show()
5037 return sprintf(buf, "0x%016llx\n", rbd_dev->header.features); in rbd_features_show()
5045 if (rbd_dev->major) in rbd_major_show()
5046 return sprintf(buf, "%d\n", rbd_dev->major); in rbd_major_show()
5056 return sprintf(buf, "%d\n", rbd_dev->minor); in rbd_minor_show()
5064 ceph_client_addr(rbd_dev->rbd_client->client); in rbd_client_addr_show()
5066 return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr, in rbd_client_addr_show()
5067 le32_to_cpu(client_addr->nonce)); in rbd_client_addr_show()
5076 ceph_client_gid(rbd_dev->rbd_client->client)); in rbd_client_id_show()
5084 return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid); in rbd_cluster_fsid_show()
5093 return -EPERM; in rbd_config_info_show()
5095 return sprintf(buf, "%s\n", rbd_dev->config_info); in rbd_config_info_show()
5103 return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); in rbd_pool_show()
5112 (unsigned long long) rbd_dev->spec->pool_id); in rbd_pool_id_show()
5120 return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: ""); in rbd_pool_ns_show()
5128 if (rbd_dev->spec->image_name) in rbd_name_show()
5129 return sprintf(buf, "%s\n", rbd_dev->spec->image_name); in rbd_name_show()
5139 return sprintf(buf, "%s\n", rbd_dev->spec->image_id); in rbd_image_id_show()
5143 * Shows the name of the currently-mapped snapshot (or
5152 return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); in rbd_snap_show()
5160 return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id); in rbd_snap_id_show()
5175 if (!rbd_dev->parent) in rbd_parent_show()
5178 for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) { in rbd_parent_show()
5179 struct rbd_spec *spec = rbd_dev->parent_spec; in rbd_parent_show()
5188 spec->pool_id, spec->pool_name, in rbd_parent_show()
5189 spec->pool_ns ?: "", in rbd_parent_show()
5190 spec->image_id, spec->image_name ?: "(unknown)", in rbd_parent_show()
5191 spec->snap_id, spec->snap_name, in rbd_parent_show()
5192 rbd_dev->parent_overlap); in rbd_parent_show()
5207 return -EPERM; in rbd_image_refresh()
5274 kref_get(&spec->kref); in rbd_spec_get()
5283 kref_put(&spec->kref, rbd_spec_free); in rbd_spec_put()
5294 spec->pool_id = CEPH_NOPOOL; in rbd_spec_alloc()
5295 spec->snap_id = CEPH_NOSNAP; in rbd_spec_alloc()
5296 kref_init(&spec->kref); in rbd_spec_alloc()
5305 kfree(spec->pool_name); in rbd_spec_free()
5306 kfree(spec->pool_ns); in rbd_spec_free()
5307 kfree(spec->image_id); in rbd_spec_free()
5308 kfree(spec->image_name); in rbd_spec_free()
5309 kfree(spec->snap_name); in rbd_spec_free()
5315 WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED); in rbd_dev_free()
5316 WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED); in rbd_dev_free()
5318 ceph_oid_destroy(&rbd_dev->header_oid); in rbd_dev_free()
5319 ceph_oloc_destroy(&rbd_dev->header_oloc); in rbd_dev_free()
5320 kfree(rbd_dev->config_info); in rbd_dev_free()
5322 rbd_put_client(rbd_dev->rbd_client); in rbd_dev_free()
5323 rbd_spec_put(rbd_dev->spec); in rbd_dev_free()
5324 kfree(rbd_dev->opts); in rbd_dev_free()
5331 bool need_put = !!rbd_dev->opts; in rbd_dev_release()
5334 destroy_workqueue(rbd_dev->task_wq); in rbd_dev_release()
5335 ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); in rbd_dev_release()
5343 * doing something similar to dm (dm-builtin.c) is overkill. in rbd_dev_release()
5357 spin_lock_init(&rbd_dev->lock); in __rbd_dev_create()
5358 INIT_LIST_HEAD(&rbd_dev->node); in __rbd_dev_create()
5359 init_rwsem(&rbd_dev->header_rwsem); in __rbd_dev_create()
5361 rbd_dev->header.data_pool_id = CEPH_NOPOOL; in __rbd_dev_create()
5362 ceph_oid_init(&rbd_dev->header_oid); in __rbd_dev_create()
5363 rbd_dev->header_oloc.pool = spec->pool_id; in __rbd_dev_create()
5364 if (spec->pool_ns) { in __rbd_dev_create()
5365 WARN_ON(!*spec->pool_ns); in __rbd_dev_create()
5366 rbd_dev->header_oloc.pool_ns = in __rbd_dev_create()
5367 ceph_find_or_create_string(spec->pool_ns, in __rbd_dev_create()
5368 strlen(spec->pool_ns)); in __rbd_dev_create()
5371 mutex_init(&rbd_dev->watch_mutex); in __rbd_dev_create()
5372 rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; in __rbd_dev_create()
5373 INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch); in __rbd_dev_create()
5375 init_rwsem(&rbd_dev->lock_rwsem); in __rbd_dev_create()
5376 rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; in __rbd_dev_create()
5377 INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock); in __rbd_dev_create()
5378 INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock); in __rbd_dev_create()
5379 INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock); in __rbd_dev_create()
5380 INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work); in __rbd_dev_create()
5381 spin_lock_init(&rbd_dev->lock_lists_lock); in __rbd_dev_create()
5382 INIT_LIST_HEAD(&rbd_dev->acquiring_list); in __rbd_dev_create()
5383 INIT_LIST_HEAD(&rbd_dev->running_list); in __rbd_dev_create()
5384 init_completion(&rbd_dev->acquire_wait); in __rbd_dev_create()
5385 init_completion(&rbd_dev->releasing_wait); in __rbd_dev_create()
5387 spin_lock_init(&rbd_dev->object_map_lock); in __rbd_dev_create()
5389 rbd_dev->dev.bus = &rbd_bus_type; in __rbd_dev_create()
5390 rbd_dev->dev.type = &rbd_device_type; in __rbd_dev_create()
5391 rbd_dev->dev.parent = &rbd_root_dev; in __rbd_dev_create()
5392 device_initialize(&rbd_dev->dev); in __rbd_dev_create()
5411 rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0, in rbd_dev_create()
5414 if (rbd_dev->dev_id < 0) in rbd_dev_create()
5417 sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id); in rbd_dev_create()
5418 rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM, in rbd_dev_create()
5419 rbd_dev->name); in rbd_dev_create()
5420 if (!rbd_dev->task_wq) in rbd_dev_create()
5426 rbd_dev->rbd_client = rbdc; in rbd_dev_create()
5427 rbd_dev->spec = spec; in rbd_dev_create()
5428 rbd_dev->opts = opts; in rbd_dev_create()
5430 dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id); in rbd_dev_create()
5434 ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); in rbd_dev_create()
5443 put_device(&rbd_dev->dev); in rbd_dev_destroy()
5461 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in _rbd_dev_v2_snap_size()
5462 &rbd_dev->header_oloc, "get_size", in _rbd_dev_v2_snap_size()
5465 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in _rbd_dev_v2_snap_size()
5469 return -ERANGE; in _rbd_dev_v2_snap_size()
5473 dout(" order %u", (unsigned int)*order); in _rbd_dev_v2_snap_size()
5477 dout(" snap_id 0x%016llx snap_size = %llu\n", in _rbd_dev_v2_snap_size()
5497 return -ENOMEM; in rbd_dev_v2_object_prefix()
5499 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_object_prefix()
5500 &rbd_dev->header_oloc, "get_object_prefix", in rbd_dev_v2_object_prefix()
5502 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_object_prefix()
5516 dout(" object_prefix = %s\n", object_prefix); in rbd_dev_v2_object_prefix()
5540 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in _rbd_dev_v2_snap_features()
5541 &rbd_dev->header_oloc, "get_features", in _rbd_dev_v2_snap_features()
5544 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in _rbd_dev_v2_snap_features()
5548 return -ERANGE; in _rbd_dev_v2_snap_features()
5554 return -ENXIO; in _rbd_dev_v2_snap_features()
5559 dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", in _rbd_dev_v2_snap_features()
5569 * object map, store them in rbd_dev->object_map_flags.
5576 __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id); in rbd_dev_v2_get_flags()
5580 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_get_flags()
5581 &rbd_dev->header_oloc, "get_flags", in rbd_dev_v2_get_flags()
5587 return -EBADMSG; in rbd_dev_v2_get_flags()
5589 rbd_dev->object_map_flags = le64_to_cpu(flags); in rbd_dev_v2_get_flags()
5605 kfree(pii->pool_ns); in rbd_parent_info_cleanup()
5606 kfree(pii->image_id); in rbd_parent_info_cleanup()
5626 ceph_decode_64_safe(p, end, pii->pool_id, e_inval); in decode_parent_image_spec()
5627 pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); in decode_parent_image_spec()
5628 if (IS_ERR(pii->pool_ns)) { in decode_parent_image_spec()
5629 ret = PTR_ERR(pii->pool_ns); in decode_parent_image_spec()
5630 pii->pool_ns = NULL; in decode_parent_image_spec()
5633 pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); in decode_parent_image_spec()
5634 if (IS_ERR(pii->image_id)) { in decode_parent_image_spec()
5635 ret = PTR_ERR(pii->image_id); in decode_parent_image_spec()
5636 pii->image_id = NULL; in decode_parent_image_spec()
5639 ceph_decode_64_safe(p, end, pii->snap_id, e_inval); in decode_parent_image_spec()
5643 return -EINVAL; in decode_parent_image_spec()
5651 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __get_parent_info()
5656 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info()
5660 return ret == -EOPNOTSUPP ? 1 : ret; in __get_parent_info()
5668 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info()
5676 ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval); in __get_parent_info()
5677 if (pii->has_overlap) in __get_parent_info()
5678 ceph_decode_64_safe(&p, end, pii->overlap, e_inval); in __get_parent_info()
5680 dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", in __get_parent_info()
5681 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, in __get_parent_info()
5682 pii->has_overlap, pii->overlap); in __get_parent_info()
5686 return -EINVAL; in __get_parent_info()
5697 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in __get_parent_info_legacy()
5702 ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, in __get_parent_info_legacy()
5710 ceph_decode_64_safe(&p, end, pii->pool_id, e_inval); in __get_parent_info_legacy()
5711 pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); in __get_parent_info_legacy()
5712 if (IS_ERR(pii->image_id)) { in __get_parent_info_legacy()
5713 ret = PTR_ERR(pii->image_id); in __get_parent_info_legacy()
5714 pii->image_id = NULL; in __get_parent_info_legacy()
5717 ceph_decode_64_safe(&p, end, pii->snap_id, e_inval); in __get_parent_info_legacy()
5718 pii->has_overlap = true; in __get_parent_info_legacy()
5719 ceph_decode_64_safe(&p, end, pii->overlap, e_inval); in __get_parent_info_legacy()
5721 dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", in __get_parent_info_legacy()
5722 __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, in __get_parent_info_legacy()
5723 pii->has_overlap, pii->overlap); in __get_parent_info_legacy()
5727 return -EINVAL; in __get_parent_info_legacy()
5739 return -ENOMEM; in rbd_dev_v2_parent_info()
5744 return -ENOMEM; in rbd_dev_v2_parent_info()
5748 ceph_encode_64(&p, rbd_dev->spec->snap_id); in rbd_dev_v2_parent_info()
5767 return -ENOMEM; in rbd_dev_setup_parent()
5778 ret = -EIO; in rbd_dev_setup_parent()
5789 parent_spec->pool_id = pii.pool_id; in rbd_dev_setup_parent()
5791 parent_spec->pool_ns = pii.pool_ns; in rbd_dev_setup_parent()
5794 parent_spec->image_id = pii.image_id; in rbd_dev_setup_parent()
5796 parent_spec->snap_id = pii.snap_id; in rbd_dev_setup_parent()
5798 rbd_assert(!rbd_dev->parent_spec); in rbd_dev_setup_parent()
5799 rbd_dev->parent_spec = parent_spec; in rbd_dev_setup_parent()
5808 rbd_dev->parent_overlap = pii.overlap; in rbd_dev_setup_parent()
5828 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_striping_info()
5829 &rbd_dev->header_oloc, "get_stripe_unit_count", in rbd_dev_v2_striping_info()
5831 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_striping_info()
5835 return -ERANGE; in rbd_dev_v2_striping_info()
5839 dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, in rbd_dev_v2_striping_info()
5850 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_data_pool()
5851 &rbd_dev->header_oloc, "get_data_pool", in rbd_dev_v2_data_pool()
5854 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_data_pool()
5858 return -EBADMSG; in rbd_dev_v2_data_pool()
5861 dout(" data_pool_id = %lld\n", *data_pool_id); in rbd_dev_v2_data_pool()
5880 rbd_assert(!rbd_dev->spec->image_name); in rbd_dev_image_name()
5882 len = strlen(rbd_dev->spec->image_id); in rbd_dev_image_name()
5890 ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len); in rbd_dev_image_name()
5898 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, in rbd_dev_image_name()
5910 dout("%s: name is %s len is %zd\n", __func__, image_name, len); in rbd_dev_image_name()
5920 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_v1_snap_id_by_name()
5926 snap_name = rbd_dev->header.snap_names; in rbd_v1_snap_id_by_name()
5927 while (which < snapc->num_snaps) { in rbd_v1_snap_id_by_name()
5929 return snapc->snaps[which]; in rbd_v1_snap_id_by_name()
5938 struct ceph_snap_context *snapc = rbd_dev->header.snapc; in rbd_v2_snap_id_by_name()
5943 for (which = 0; !found && which < snapc->num_snaps; which++) { in rbd_v2_snap_id_by_name()
5946 snap_id = snapc->snaps[which]; in rbd_v2_snap_id_by_name()
5949 /* ignore no-longer existing snapshots */ in rbd_v2_snap_id_by_name()
5950 if (PTR_ERR(snap_name) == -ENOENT) in rbd_v2_snap_id_by_name()
5967 if (rbd_dev->image_format == 1) in rbd_snap_id_by_name()
5978 struct rbd_spec *spec = rbd_dev->spec; in rbd_spec_fill_snap_id()
5980 rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name); in rbd_spec_fill_snap_id()
5981 rbd_assert(spec->image_id && spec->image_name); in rbd_spec_fill_snap_id()
5982 rbd_assert(spec->snap_name); in rbd_spec_fill_snap_id()
5984 if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { in rbd_spec_fill_snap_id()
5987 snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); in rbd_spec_fill_snap_id()
5989 return -ENOENT; in rbd_spec_fill_snap_id()
5991 spec->snap_id = snap_id; in rbd_spec_fill_snap_id()
5993 spec->snap_id = CEPH_NOSNAP; in rbd_spec_fill_snap_id()
6007 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; in rbd_spec_fill_names()
6008 struct rbd_spec *spec = rbd_dev->spec; in rbd_spec_fill_names()
6014 rbd_assert(spec->pool_id != CEPH_NOPOOL); in rbd_spec_fill_names()
6015 rbd_assert(spec->image_id); in rbd_spec_fill_names()
6016 rbd_assert(spec->snap_id != CEPH_NOSNAP); in rbd_spec_fill_names()
6020 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id); in rbd_spec_fill_names()
6022 rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id); in rbd_spec_fill_names()
6023 return -EIO; in rbd_spec_fill_names()
6027 return -ENOMEM; in rbd_spec_fill_names()
6037 snap_name = rbd_snap_name(rbd_dev, spec->snap_id); in rbd_spec_fill_names()
6043 spec->pool_name = pool_name; in rbd_spec_fill_names()
6044 spec->image_name = image_name; in rbd_spec_fill_names()
6045 spec->snap_name = snap_name; in rbd_spec_fill_names()
6078 return -ENOMEM; in rbd_dev_v2_snap_context()
6080 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_snap_context()
6081 &rbd_dev->header_oloc, "get_snapcontext", in rbd_dev_v2_snap_context()
6083 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_snap_context()
6089 ret = -ERANGE; in rbd_dev_v2_snap_context()
6099 if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) in rbd_dev_v2_snap_context()
6101 ret = -EINVAL; in rbd_dev_v2_snap_context()
6110 ret = -ENOMEM; in rbd_dev_v2_snap_context()
6113 snapc->seq = seq; in rbd_dev_v2_snap_context()
6115 snapc->snaps[i] = ceph_decode_64(&p); in rbd_dev_v2_snap_context()
6118 dout(" snap context seq = %llu, snap_count = %u\n", in rbd_dev_v2_snap_context()
6140 return ERR_PTR(-ENOMEM); in rbd_dev_v2_snap_name()
6143 ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, in rbd_dev_v2_snap_name()
6144 &rbd_dev->header_oloc, "get_snapshot_name", in rbd_dev_v2_snap_name()
6146 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_v2_snap_name()
6158 dout(" snap_id 0x%016llx snap_name = %s\n", in rbd_dev_v2_snap_name()
6173 first_time ? &header->obj_order : NULL, in rbd_dev_v2_header_info()
6174 &header->image_size); in rbd_dev_v2_header_info()
6184 ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); in rbd_dev_v2_header_info()
6195 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_header_info()
6196 rbd_assert(!header->object_prefix && !header->snapc); in rbd_dev_header_info()
6198 if (rbd_dev->image_format == 1) in rbd_dev_header_info()
6206 * first found non-space character (if any). Returns the length of
6207 * the token (string of non-white space characters) found. Note
6227 * that a duplicate buffer is created even for a zero-length token.
6229 * Returns a pointer to the newly-allocated duplicate, or a null
6231 * the lenp argument is a non-null pointer, the length of the token
6260 struct rbd_options *opt = pctx->opts; in rbd_parse_param()
6265 ret = ceph_parse_param(param, pctx->copts, NULL); in rbd_parse_param()
6266 if (ret != -ENOPARAM) in rbd_parse_param()
6270 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); in rbd_parse_param()
6272 if (token == -ENOPARAM) in rbd_parse_param()
6274 param->key); in rbd_parse_param()
6282 opt->queue_depth = result.uint_32; in rbd_parse_param()
6288 return inval_plog(&log, "alloc_size must be a power of 2"); in rbd_parse_param()
6289 opt->alloc_size = result.uint_32; in rbd_parse_param()
6295 opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000); in rbd_parse_param()
6298 kfree(pctx->spec->pool_ns); in rbd_parse_param()
6299 pctx->spec->pool_ns = param->string; in rbd_parse_param()
6300 param->string = NULL; in rbd_parse_param()
6305 opt->alloc_hint_flags &= in rbd_parse_param()
6310 opt->alloc_hint_flags |= in rbd_parse_param()
6312 opt->alloc_hint_flags &= in rbd_parse_param()
6316 opt->alloc_hint_flags |= in rbd_parse_param()
6318 opt->alloc_hint_flags &= in rbd_parse_param()
6321 default: in rbd_parse_param()
6326 opt->read_only = true; in rbd_parse_param()
6329 opt->read_only = false; in rbd_parse_param()
6332 opt->lock_on_read = true; in rbd_parse_param()
6335 opt->exclusive = true; in rbd_parse_param()
6338 opt->trim = false; in rbd_parse_param()
6340 default: in rbd_parse_param()
6347 return inval_plog(&log, "%s out of range", param->key); in rbd_parse_param()
6359 dout("%s '%s'\n", __func__, options); in rbd_parse_options()
6377 return -ENOMEM; in rbd_parse_options()
6395 * and the data written is passed here via a NUL-terminated buffer.
6399 * the other parameters which return dynamically-allocated
6417 * A comma-separated list of one or more monitor addresses.
6422 * A comma-separated list of ceph and/or rbd options.
6431 * provided. Snapshot mappings are always read-only.
6451 return -EINVAL; in rbd_add_parse_args()
6457 ret = -EINVAL; in rbd_add_parse_args()
6460 return -ENOMEM; in rbd_add_parse_args()
6470 pctx.spec->pool_name = dup_token(&buf, NULL); in rbd_add_parse_args()
6471 if (!pctx.spec->pool_name) in rbd_add_parse_args()
6473 if (!*pctx.spec->pool_name) { in rbd_add_parse_args()
6478 pctx.spec->image_name = dup_token(&buf, NULL); in rbd_add_parse_args()
6479 if (!pctx.spec->image_name) in rbd_add_parse_args()
6481 if (!*pctx.spec->image_name) { in rbd_add_parse_args()
6487 * Snapshot name is optional; default is to use "-" in rbd_add_parse_args()
6493 len = sizeof (RBD_SNAP_HEAD_NAME) - 1; in rbd_add_parse_args()
6495 ret = -ENAMETOOLONG; in rbd_add_parse_args()
6502 pctx.spec->snap_name = snap_name; in rbd_add_parse_args()
6514 pctx.opts->read_only = RBD_READ_ONLY_DEFAULT; in rbd_add_parse_args()
6515 pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; in rbd_add_parse_args()
6516 pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT; in rbd_add_parse_args()
6517 pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; in rbd_add_parse_args()
6518 pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; in rbd_add_parse_args()
6519 pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; in rbd_add_parse_args()
6520 pctx.opts->trim = RBD_TRIM_DEFAULT; in rbd_add_parse_args()
6538 ret = -ENOMEM; in rbd_add_parse_args()
6549 down_write(&rbd_dev->lock_rwsem); in rbd_dev_image_unlock()
6552 up_write(&rbd_dev->lock_rwsem); in rbd_dev_image_unlock()
6564 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { in rbd_add_acquire_lock()
6565 if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read) in rbd_add_acquire_lock()
6568 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); in rbd_add_acquire_lock()
6569 return -EINVAL; in rbd_add_acquire_lock()
6576 queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); in rbd_add_acquire_lock()
6577 ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait, in rbd_add_acquire_lock()
6578 ceph_timeout_jiffies(rbd_dev->opts->lock_timeout)); in rbd_add_acquire_lock()
6580 ret = rbd_dev->acquire_err; in rbd_add_acquire_lock()
6582 cancel_delayed_work_sync(&rbd_dev->lock_dwork); in rbd_add_acquire_lock()
6584 ret = -ETIMEDOUT; in rbd_add_acquire_lock()
6595 rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); in rbd_add_acquire_lock()
6600 * An rbd format 2 image has a unique identifier, distinct from the
6627 if (rbd_dev->spec->image_id) { in rbd_dev_image_id()
6628 rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1; in rbd_dev_image_id()
6634 * First, see if the format 2 image id file exists, and if in rbd_dev_image_id()
6638 rbd_dev->spec->image_name); in rbd_dev_image_id()
6642 dout("rbd id object name is %s\n", oid.name); in rbd_dev_image_id()
6648 ret = -ENOMEM; in rbd_dev_image_id()
6654 ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, in rbd_dev_image_id()
6657 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); in rbd_dev_image_id()
6658 if (ret == -ENOENT) { in rbd_dev_image_id()
6660 ret = image_id ? 0 : -ENOMEM; in rbd_dev_image_id()
6662 rbd_dev->image_format = 1; in rbd_dev_image_id()
6670 rbd_dev->image_format = 2; in rbd_dev_image_id()
6674 rbd_dev->spec->image_id = image_id; in rbd_dev_image_id()
6675 dout("image_id is %s\n", image_id); in rbd_dev_image_id()
6695 rbd_image_header_cleanup(&rbd_dev->header); in rbd_dev_unprobe()
6703 ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); in rbd_dev_v2_header_onetime()
6712 rbd_is_ro(rbd_dev), &header->features); in rbd_dev_v2_header_onetime()
6718 if (header->features & RBD_FEATURE_STRIPINGV2) { in rbd_dev_v2_header_onetime()
6719 ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, in rbd_dev_v2_header_onetime()
6720 &header->stripe_count); in rbd_dev_v2_header_onetime()
6725 if (header->features & RBD_FEATURE_DATA_POOL) { in rbd_dev_v2_header_onetime()
6726 ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); in rbd_dev_v2_header_onetime()
6735 * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
6744 if (!rbd_dev->parent_spec) in rbd_dev_probe_parent()
6749 ret = -EINVAL; in rbd_dev_probe_parent()
6753 parent = __rbd_dev_create(rbd_dev->parent_spec); in rbd_dev_probe_parent()
6755 ret = -ENOMEM; in rbd_dev_probe_parent()
6763 parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client); in rbd_dev_probe_parent()
6764 parent->spec = rbd_spec_get(rbd_dev->parent_spec); in rbd_dev_probe_parent()
6766 __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags); in rbd_dev_probe_parent()
6772 rbd_dev->parent = parent; in rbd_dev_probe_parent()
6773 atomic_set(&rbd_dev->parent_ref, 1); in rbd_dev_probe_parent()
6784 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); in rbd_dev_device_release()
6787 unregister_blkdev(rbd_dev->major, rbd_dev->name); in rbd_dev_device_release()
6791 * rbd_dev->header_rwsem must be locked for write and will be unlocked
6801 ret = register_blkdev(0, rbd_dev->name); in rbd_dev_device_setup()
6805 rbd_dev->major = ret; in rbd_dev_device_setup()
6806 rbd_dev->minor = 0; in rbd_dev_device_setup()
6808 rbd_dev->major = rbd_major; in rbd_dev_device_setup()
6809 rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id); in rbd_dev_device_setup()
6818 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); in rbd_dev_device_setup()
6819 set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev)); in rbd_dev_device_setup()
6821 ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); in rbd_dev_device_setup()
6825 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); in rbd_dev_device_setup()
6826 up_write(&rbd_dev->header_rwsem); in rbd_dev_device_setup()
6833 unregister_blkdev(rbd_dev->major, rbd_dev->name); in rbd_dev_device_setup()
6835 up_write(&rbd_dev->header_rwsem); in rbd_dev_device_setup()
6841 struct rbd_spec *spec = rbd_dev->spec; in rbd_dev_header_name()
6846 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_header_name()
6847 if (rbd_dev->image_format == 1) in rbd_dev_header_name()
6848 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", in rbd_dev_header_name()
6849 spec->image_name, RBD_SUFFIX); in rbd_dev_header_name()
6851 ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", in rbd_dev_header_name()
6852 RBD_HEADER_PREFIX, spec->image_id); in rbd_dev_header_name()
6861 rbd_dev->spec->pool_name, in rbd_print_dne()
6862 rbd_dev->spec->pool_ns ?: "", in rbd_print_dne()
6863 rbd_dev->spec->pool_ns ? "/" : "", in rbd_print_dne()
6864 rbd_dev->spec->image_name); in rbd_print_dne()
6867 rbd_dev->spec->pool_name, in rbd_print_dne()
6868 rbd_dev->spec->pool_ns ?: "", in rbd_print_dne()
6869 rbd_dev->spec->pool_ns ? "/" : "", in rbd_print_dne()
6870 rbd_dev->spec->image_name, in rbd_print_dne()
6871 rbd_dev->spec->snap_name); in rbd_print_dne()
6881 rbd_dev->image_format = 0; in rbd_dev_image_release()
6882 kfree(rbd_dev->spec->image_id); in rbd_dev_image_release()
6883 rbd_dev->spec->image_id = NULL; in rbd_dev_image_release()
6902 * error, rbd_dev->spec->image_id will be filled in with in rbd_dev_image_probe()
6903 * a dynamically-allocated string, and rbd_dev->image_format in rbd_dev_image_probe()
6904 * will be set to either 1 or 2. in rbd_dev_image_probe()
6917 if (ret == -ENOENT) in rbd_dev_image_probe()
6924 down_write(&rbd_dev->header_rwsem); in rbd_dev_image_probe()
6926 ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); in rbd_dev_image_probe()
6928 if (ret == -ENOENT && !need_watch) in rbd_dev_image_probe()
6937 * id, image name and id, and snap name - need to fill snap id. in rbd_dev_image_probe()
6939 * and snap ids - need to fill in names for those ids. in rbd_dev_image_probe()
6946 if (ret == -ENOENT) in rbd_dev_image_probe()
6956 (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) { in rbd_dev_image_probe()
6962 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { in rbd_dev_image_probe()
6972 dout("discovered format %u image, header name is %s\n", in rbd_dev_image_probe()
6973 rbd_dev->image_format, rbd_dev->header_oid.name); in rbd_dev_image_probe()
6978 up_write(&rbd_dev->header_rwsem); in rbd_dev_image_probe()
6983 rbd_dev->image_format = 0; in rbd_dev_image_probe()
6984 kfree(rbd_dev->spec->image_id); in rbd_dev_image_probe()
6985 rbd_dev->spec->image_id = NULL; in rbd_dev_image_probe()
6992 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_update_header()
6993 rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ in rbd_dev_update_header()
6995 if (rbd_dev->header.image_size != header->image_size) { in rbd_dev_update_header()
6996 rbd_dev->header.image_size = header->image_size; in rbd_dev_update_header()
6999 rbd_dev->mapping.size = header->image_size; in rbd_dev_update_header()
7004 ceph_put_snap_context(rbd_dev->header.snapc); in rbd_dev_update_header()
7005 rbd_dev->header.snapc = header->snapc; in rbd_dev_update_header()
7006 header->snapc = NULL; in rbd_dev_update_header()
7008 if (rbd_dev->image_format == 1) { in rbd_dev_update_header()
7009 kfree(rbd_dev->header.snap_names); in rbd_dev_update_header()
7010 rbd_dev->header.snap_names = header->snap_names; in rbd_dev_update_header()
7011 header->snap_names = NULL; in rbd_dev_update_header()
7013 kfree(rbd_dev->header.snap_sizes); in rbd_dev_update_header()
7014 rbd_dev->header.snap_sizes = header->snap_sizes; in rbd_dev_update_header()
7015 header->snap_sizes = NULL; in rbd_dev_update_header()
7022 if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { in rbd_dev_update_parent()
7036 if (rbd_dev->parent_overlap) { in rbd_dev_update_parent()
7037 rbd_dev->parent_overlap = 0; in rbd_dev_update_parent()
7040 rbd_dev->disk->disk_name); in rbd_dev_update_parent()
7043 rbd_assert(rbd_dev->parent_spec); in rbd_dev_update_parent()
7049 if (!pii->overlap && rbd_dev->parent_overlap) in rbd_dev_update_parent()
7052 rbd_dev->parent_overlap = pii->overlap; in rbd_dev_update_parent()
7062 dout("%s rbd_dev %p\n", __func__, rbd_dev); in rbd_dev_refresh()
7072 if (rbd_dev->parent) { in rbd_dev_refresh()
7078 down_write(&rbd_dev->header_rwsem); in rbd_dev_refresh()
7080 if (rbd_dev->parent) in rbd_dev_refresh()
7082 up_write(&rbd_dev->header_rwsem); in rbd_dev_refresh()
7100 return -EPERM; in do_rbd_add()
7103 return -ENODEV; in do_rbd_add()
7117 rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name); in do_rbd_add()
7119 if (rc == -ENOENT) in do_rbd_add()
7120 pr_info("pool %s does not exist\n", spec->pool_name); in do_rbd_add()
7123 spec->pool_id = (u64)rc; in do_rbd_add()
7127 rc = -ENOMEM; in do_rbd_add()
7134 /* if we are mapping a snapshot it will be a read-only mapping */ in do_rbd_add()
7135 if (rbd_dev->opts->read_only || in do_rbd_add()
7136 strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME)) in do_rbd_add()
7137 __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); in do_rbd_add()
7139 rbd_dev->config_info = kstrdup(buf, GFP_KERNEL); in do_rbd_add()
7140 if (!rbd_dev->config_info) { in do_rbd_add()
7141 rc = -ENOMEM; in do_rbd_add()
7149 if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) { in do_rbd_add()
7151 rbd_dev->layout.object_size); in do_rbd_add()
7152 rbd_dev->opts->alloc_size = rbd_dev->layout.object_size; in do_rbd_add()
7165 rc = device_add(&rbd_dev->dev); in do_rbd_add()
7169 rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); in do_rbd_add()
7174 list_add_tail(&rbd_dev->node, &rbd_dev_list); in do_rbd_add()
7177 pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name, in do_rbd_add()
7178 (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT, in do_rbd_add()
7179 rbd_dev->header.features); in do_rbd_add()
7205 return -EINVAL; in add_store()
7218 while (rbd_dev->parent) { in rbd_dev_remove_parent()
7220 struct rbd_device *second = first->parent; in rbd_dev_remove_parent()
7227 while (second && (third = second->parent)) { in rbd_dev_remove_parent()
7234 first->parent = NULL; in rbd_dev_remove_parent()
7235 first->parent_overlap = 0; in rbd_dev_remove_parent()
7237 rbd_assert(first->parent_spec); in rbd_dev_remove_parent()
7238 rbd_spec_put(first->parent_spec); in rbd_dev_remove_parent()
7239 first->parent_spec = NULL; in rbd_dev_remove_parent()
7252 return -EPERM; in do_rbd_remove()
7254 dev_id = -1; in do_rbd_remove()
7259 return -EINVAL; in do_rbd_remove()
7266 return -EINVAL; in do_rbd_remove()
7270 ret = -ENOENT; in do_rbd_remove()
7273 if (rbd_dev->dev_id == dev_id) { in do_rbd_remove()
7279 spin_lock_irq(&rbd_dev->lock); in do_rbd_remove()
7280 if (rbd_dev->open_count && !force) in do_rbd_remove()
7281 ret = -EBUSY; in do_rbd_remove()
7283 &rbd_dev->flags)) in do_rbd_remove()
7284 ret = -EINPROGRESS; in do_rbd_remove()
7285 spin_unlock_irq(&rbd_dev->lock); in do_rbd_remove()
7296 blk_mq_freeze_queue(rbd_dev->disk->queue); in do_rbd_remove()
7297 blk_mark_disk_dead(rbd_dev->disk); in do_rbd_remove()
7300 del_gendisk(rbd_dev->disk); in do_rbd_remove()
7302 list_del_init(&rbd_dev->node); in do_rbd_remove()
7304 device_del(&rbd_dev->dev); in do_rbd_remove()
7316 return -EINVAL; in remove_store()
7359 return -ENOMEM; in rbd_slab_init()
7371 return -ENOMEM; in rbd_slab_init()
7391 return -EINVAL; in rbd_init()
7400 * rbd devices * queue depth, so leave @max_active at default. in rbd_init()
7404 rc = -ENOMEM; in rbd_init()