summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-12-26 22:37:05 -0800
committerDavid S. Miller <davem@davemloft.net>2010-12-26 22:37:05 -0800
commit17f7f4d9fcce8f1b75b5f735569309dee7665968 (patch)
tree14d7e49ca0053a0fcab3c33b5023bf3f90c5c08a /drivers/block
parent041110a439e21cd40709ead4ffbfa8034619ad77 (diff)
parentd7c1255a3a21e98bdc64df8ccf005a174d7e6289 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: net/ipv4/fib_frontend.c
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/cciss.c5
-rw-r--r--drivers/block/cciss_scsi.c8
-rw-r--r--drivers/block/drbd/drbd_receiver.c15
-rw-r--r--drivers/block/drbd/drbd_req.h3
-rw-r--r--drivers/block/drbd/drbd_worker.c11
-rw-r--r--drivers/block/rbd.c748
-rw-r--r--drivers/block/xen-blkfront.c57
9 files changed, 534 insertions, 317 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a1725e6488d3..7888501ad9ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
{
struct request_queue *q;
int cnt = FD_MAX_UNITS;
- struct request *rq;
+ struct request *rq = NULL;
/* Find next queue we can dispatch from */
fdc_queue = fdc_queue + 1;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4e4cc6c828cb..605a67e40bbf 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
{
struct request_queue *q;
int old_pos = fdc_queue;
- struct request *rq;
+ struct request *rq = NULL;
do {
q = unit[fdc_queue].disk->queue;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a67d0a611a8a..8e0f9256eb58 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
MODULE_LICENSE("GPL");
static DEFINE_MUTEX(cciss_mutex);
+static struct proc_dir_entry *proc_cciss;
#include "cciss_cmd.h"
#include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
#define ENG_GIG_FACTOR (ENG_GIG/512)
#define ENGAGE_SCSI "engage scsi"
-static struct proc_dir_entry *proc_cciss;
-
static void cciss_seq_show_header(struct seq_file *seq)
{
ctlr_info_t *h = seq->private;
@@ -2835,6 +2834,8 @@ static int cciss_revalidate(struct gendisk *disk)
InquiryData_struct *inq_buff = NULL;
for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
+ if (!h->drv[logvol])
+ continue;
if (memcmp(h->drv[logvol]->LunID, drv->LunID,
sizeof(drv->LunID)) == 0) {
FOUND = 1;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 575495f3c4b8..727d0225b7d0 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -62,8 +62,8 @@ static int cciss_scsi_proc_info(
int length, /* length of data in buffer */
int func); /* 0 == read, 1 == write */
-static int cciss_scsi_queue_command (struct scsi_cmnd *cmd,
- void (* done)(struct scsi_cmnd *));
+static int cciss_scsi_queue_command (struct Scsi_Host *h,
+ struct scsi_cmnd *cmd);
static int cciss_eh_device_reset_handler(struct scsi_cmnd *);
static int cciss_eh_abort_handler(struct scsi_cmnd *);
@@ -1406,7 +1406,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
static int
-cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
{
ctlr_info_t *h;
int rc;
@@ -1504,6 +1504,8 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
return 0;
}
+static DEF_SCSI_QCMD(cciss_scsi_queue_command)
+
static void cciss_unregister_scsi(ctlr_info_t *h)
{
struct cciss_scsi_adapter_data_t *sa;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d299fe9e78c8..24487d4fb202 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -36,7 +36,6 @@
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/pkt_sched.h>
#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
@@ -3628,17 +3627,19 @@ static void drbdd(struct drbd_conf *mdev)
}
shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
- rv = drbd_recv(mdev, &header->h80.payload, shs);
- if (unlikely(rv != shs)) {
- dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
- goto err_out;
- }
-
if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
goto err_out;
}
+ if (shs) {
+ rv = drbd_recv(mdev, &header->h80.payload, shs);
+ if (unlikely(rv != shs)) {
+ dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
+ goto err_out;
+ }
+ }
+
rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
if (unlikely(!rv)) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 181ea0364822..ab2bd09d54b4 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -339,7 +339,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
}
/* completion of master bio is outside of spinlock.
- * If you need it irqsave, do it your self! */
+ * If you need it irqsave, do it your self!
+ * Which means: don't use from bio endio callback. */
static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b0551ba7ad0c..34f224b018b3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -26,7 +26,6 @@
#include <linux/module.h>
#include <linux/drbd.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/memcontrol.h>
@@ -194,8 +193,10 @@ void drbd_endio_sec(struct bio *bio, int error)
*/
void drbd_endio_pri(struct bio *bio, int error)
{
+ unsigned long flags;
struct drbd_request *req = bio->bi_private;
struct drbd_conf *mdev = req->mdev;
+ struct bio_and_error m;
enum drbd_req_event what;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
@@ -221,7 +222,13 @@ void drbd_endio_pri(struct bio *bio, int error)
bio_put(req->private_bio);
req->private_bio = ERR_PTR(error);
- req_mod(req, what);
+ /* not req_mod(), we need irqsave here! */
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ __req_mod(req, what, &m);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ if (m.bio)
+ complete_master_bio(mdev, &m);
}
int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6ec9d53806c5..008d4a00b50d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -21,80 +21,9 @@
- Instructions for use
- --------------------
+ For usage instructions, please refer to:
- 1) Map a Linux block device to an existing rbd image.
-
- Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
-
- $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
-
- The snapshot name can be "-" or omitted to map the image read/write.
-
- 2) List all active blkdev<->object mappings.
-
- In this example, we have performed step #1 twice, creating two blkdevs,
- mapped to two separate rados objects in the rados rbd pool
-
- $ cat /sys/class/rbd/list
- #id major client_name pool name snap KB
- 0 254 client4143 rbd foo - 1024000
-
- The columns, in order, are:
- - blkdev unique id
- - blkdev assigned major
- - rados client id
- - rados pool name
- - rados block device name
- - mapped snapshot ("-" if none)
- - device size in KB
-
-
- 3) Create a snapshot.
-
- Usage: <blkdev id> <snapname>
-
- $ echo "0 mysnap" > /sys/class/rbd/snap_create
-
-
- 4) Listing a snapshot.
-
- $ cat /sys/class/rbd/snaps_list
- #id snap KB
- 0 - 1024000 (*)
- 0 foo 1024000
-
- The columns, in order, are:
- - blkdev unique id
- - snapshot name, '-' means none (active read/write version)
- - size of device at time of snapshot
- - the (*) indicates this is the active version
-
- 5) Rollback to snapshot.
-
- Usage: <blkdev id> <snapname>
-
- $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
-
-
- 6) Mapping an image using snapshot.
-
- A snapshot mapping is read-only. This is being done by passing
- snap=<snapname> to the options when adding a device.
-
- $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
-
-
- 7) Remove an active blkdev<->rbd image mapping.
-
- In this example, we remove the mapping with blkdev unique id 1.
-
- $ echo 1 > /sys/class/rbd/remove
-
-
- NOTE: The actual creation and deletion of rados objects is outside the scope
- of this driver.
+ Documentation/ABI/testing/sysfs-bus-rbd
*/
@@ -163,6 +92,14 @@ struct rbd_request {
u64 len;
};
+struct rbd_snap {
+ struct device dev;
+ const char *name;
+ size_t size;
+ struct list_head node;
+ u64 id;
+};
+
/*
* a single device
*/
@@ -193,21 +130,60 @@ struct rbd_device {
int read_only;
struct list_head node;
+
+ /* list of snapshots */
+ struct list_head snaps;
+
+ /* sysfs related */
+ struct device dev;
+};
+
+static struct bus_type rbd_bus_type = {
+ .name = "rbd",
};
static spinlock_t node_lock; /* protects client get/put */
-static struct class *class_rbd; /* /sys/class/rbd */
static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
static LIST_HEAD(rbd_dev_list); /* devices */
static LIST_HEAD(rbd_client_list); /* clients */
+static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
+static void rbd_dev_release(struct device *dev);
+static ssize_t rbd_snap_rollback(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t size);
+static ssize_t rbd_snap_add(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count);
+static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
+ struct rbd_snap *snap);;
+
+
+static struct rbd_device *dev_to_rbd(struct device *dev)
+{
+ return container_of(dev, struct rbd_device, dev);
+}
+
+static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
+{
+ return get_device(&rbd_dev->dev);
+}
+
+static void rbd_put_dev(struct rbd_device *rbd_dev)
+{
+ put_device(&rbd_dev->dev);
+}
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
struct rbd_device *rbd_dev = disk->private_data;
+ rbd_get_dev(rbd_dev);
+
set_device_ro(bdev, rbd_dev->read_only);
if ((mode & FMODE_WRITE) && rbd_dev->read_only)
@@ -216,9 +192,19 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
return 0;
}
+static int rbd_release(struct gendisk *disk, fmode_t mode)
+{
+ struct rbd_device *rbd_dev = disk->private_data;
+
+ rbd_put_dev(rbd_dev);
+
+ return 0;
+}
+
static const struct block_device_operations rbd_bd_ops = {
.owner = THIS_MODULE,
.open = rbd_open,
+ .release = rbd_release,
};
/*
@@ -361,7 +347,6 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
int ret = -ENOMEM;
init_rwsem(&header->snap_rwsem);
-
header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
snap_count *
@@ -1256,10 +1241,20 @@ bad:
return -ERANGE;
}
+static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
+{
+ struct rbd_snap *snap;
+
+ while (!list_empty(&rbd_dev->snaps)) {
+ snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
+ __rbd_remove_snap_dev(rbd_dev, snap);
+ }
+}
+
/*
* only read the first part of the ondisk header, without the snaps info
*/
-static int rbd_update_snaps(struct rbd_device *rbd_dev)
+static int __rbd_update_snaps(struct rbd_device *rbd_dev)
{
int ret;
struct rbd_image_header h;
@@ -1280,12 +1275,15 @@ static int rbd_update_snaps(struct rbd_device *rbd_dev)
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
rbd_dev->header.snap_names = h.snap_names;
+ rbd_dev->header.snap_names_len = h.snap_names_len;
rbd_dev->header.snap_sizes = h.snap_sizes;
rbd_dev->header.snapc->seq = snap_seq;
+ ret = __rbd_init_snaps_header(rbd_dev);
+
up_write(&rbd_dev->header.snap_rwsem);
- return 0;
+ return ret;
}
static int rbd_init_disk(struct rbd_device *rbd_dev)
@@ -1300,6 +1298,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (rc)
return rc;
+ /* no need to lock here, as rbd_dev is not registered yet */
+ rc = __rbd_init_snaps_header(rbd_dev);
+ if (rc)
+ return rc;
+
rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
if (rc)
return rc;
@@ -1343,54 +1346,360 @@ out:
return rc;
}
-/********************************************************************
- * /sys/class/rbd/
- * add map rados objects to blkdev
- * remove unmap rados objects
- * list show mappings
- *******************************************************************/
+/*
+ sysfs
+*/
+
+static ssize_t rbd_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+ return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
+}
+
+static ssize_t rbd_major_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
-static void class_rbd_release(struct class *cls)
+ return sprintf(buf, "%d\n", rbd_dev->major);
+}
+
+static ssize_t rbd_client_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- kfree(cls);
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+ return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client));
}
-static ssize_t class_rbd_list(struct class *c,
- struct class_attribute *attr,
- char *data)
+static ssize_t rbd_pool_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- int n = 0;
- struct list_head *tmp;
- int max = PAGE_SIZE;
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->pool_name);
+}
+
+static ssize_t rbd_name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->obj);
+}
+
+static ssize_t rbd_snap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->snap_name);
+}
+
+static ssize_t rbd_image_refresh(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t size)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ int rc;
+ int ret = size;
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- n += snprintf(data, max,
- "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n");
+ rc = __rbd_update_snaps(rbd_dev);
+ if (rc < 0)
+ ret = rc;
- list_for_each(tmp, &rbd_dev_list) {
- struct rbd_device *rbd_dev;
+ mutex_unlock(&ctl_mutex);
+ return ret;
+}
- rbd_dev = list_entry(tmp, struct rbd_device, node);
- n += snprintf(data+n, max-n,
- "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n",
- rbd_dev->id,
- rbd_dev->major,
- ceph_client_id(rbd_dev->client),
- rbd_dev->pool_name,
- rbd_dev->obj, rbd_dev->snap_name,
- rbd_dev->header.image_size >> 10);
- if (n == max)
+static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
+static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
+static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
+static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
+static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
+static DEVICE_ATTR(rollback_snap, S_IWUSR, NULL, rbd_snap_rollback);
+
+static struct attribute *rbd_attrs[] = {
+ &dev_attr_size.attr,
+ &dev_attr_major.attr,
+ &dev_attr_client_id.attr,
+ &dev_attr_pool.attr,
+ &dev_attr_name.attr,
+ &dev_attr_current_snap.attr,
+ &dev_attr_refresh.attr,
+ &dev_attr_create_snap.attr,
+ &dev_attr_rollback_snap.attr,
+ NULL
+};
+
+static struct attribute_group rbd_attr_group = {
+ .attrs = rbd_attrs,
+};
+
+static const struct attribute_group *rbd_attr_groups[] = {
+ &rbd_attr_group,
+ NULL
+};
+
+static void rbd_sysfs_dev_release(struct device *dev)
+{
+}
+
+static struct device_type rbd_device_type = {
+ .name = "rbd",
+ .groups = rbd_attr_groups,
+ .release = rbd_sysfs_dev_release,
+};
+
+
+/*
+ sysfs - snapshots
+*/
+
+static ssize_t rbd_snap_size_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+
+ return sprintf(buf, "%lld\n", (long long)snap->size);
+}
+
+static ssize_t rbd_snap_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+
+ return sprintf(buf, "%lld\n", (long long)snap->id);
+}
+
+static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
+static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
+
+static struct attribute *rbd_snap_attrs[] = {
+ &dev_attr_snap_size.attr,
+ &dev_attr_snap_id.attr,
+ NULL,
+};
+
+static struct attribute_group rbd_snap_attr_group = {
+ .attrs = rbd_snap_attrs,
+};
+
+static void rbd_snap_dev_release(struct device *dev)
+{
+ struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+ kfree(snap->name);
+ kfree(snap);
+}
+
+static const struct attribute_group *rbd_snap_attr_groups[] = {
+ &rbd_snap_attr_group,
+ NULL
+};
+
+static struct device_type rbd_snap_device_type = {
+ .groups = rbd_snap_attr_groups,
+ .release = rbd_snap_dev_release,
+};
+
+static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
+ struct rbd_snap *snap)
+{
+ list_del(&snap->node);
+ device_unregister(&snap->dev);
+}
+
+static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
+ struct rbd_snap *snap,
+ struct device *parent)
+{
+ struct device *dev = &snap->dev;
+ int ret;
+
+ dev->type = &rbd_snap_device_type;
+ dev->parent = parent;
+ dev->release = rbd_snap_dev_release;
+ dev_set_name(dev, "snap_%s", snap->name);
+ ret = device_register(dev);
+
+ return ret;
+}
+
+static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
+ int i, const char *name,
+ struct rbd_snap **snapp)
+{
+ int ret;
+ struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
+ if (!snap)
+ return -ENOMEM;
+ snap->name = kstrdup(name, GFP_KERNEL);
+ snap->size = rbd_dev->header.snap_sizes[i];
+ snap->id = rbd_dev->header.snapc->snaps[i];
+ if (device_is_registered(&rbd_dev->dev)) {
+ ret = rbd_register_snap_dev(rbd_dev, snap,
+ &rbd_dev->dev);
+ if (ret < 0)
+ goto err;
+ }
+ *snapp = snap;
+ return 0;
+err:
+ kfree(snap->name);
+ kfree(snap);
+ return ret;
+}
+
+/*
+ * search for the previous snap in a null delimited string list
+ */
+const char *rbd_prev_snap_name(const char *name, const char *start)
+{
+ if (name < start + 2)
+ return NULL;
+
+ name -= 2;
+ while (*name) {
+ if (name == start)
+ return start;
+ name--;
+ }
+ return name + 1;
+}
+
+/*
+ * compare the old list of snapshots that we have to what's in the header
+ * and update it accordingly. Note that the header holds the snapshots
+ * in a reverse order (from newest to oldest) and we need to go from
+ * older to new so that we don't get a duplicate snap name when
+ * doing the process (e.g., removed snapshot and recreated a new
+ * one with the same name.
+ */
+static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
+{
+ const char *name, *first_name;
+ int i = rbd_dev->header.total_snaps;
+ struct rbd_snap *snap, *old_snap = NULL;
+ int ret;
+ struct list_head *p, *n;
+
+ first_name = rbd_dev->header.snap_names;
+ name = first_name + rbd_dev->header.snap_names_len;
+
+ list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
+ u64 cur_id;
+
+ old_snap = list_entry(p, struct rbd_snap, node);
+
+ if (i)
+ cur_id = rbd_dev->header.snapc->snaps[i - 1];
+
+ if (!i || old_snap->id < cur_id) {
+ /* old_snap->id was skipped, thus was removed */
+ __rbd_remove_snap_dev(rbd_dev, old_snap);
+ continue;
+ }
+ if (old_snap->id == cur_id) {
+ /* we have this snapshot already */
+ i--;
+ name = rbd_prev_snap_name(name, first_name);
+ continue;
+ }
+ for (; i > 0;
+ i--, name = rbd_prev_snap_name(name, first_name)) {
+ if (!name) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ cur_id = rbd_dev->header.snapc->snaps[i];
+ /* snapshot removal? handle it above */
+ if (cur_id >= old_snap->id)
+ break;
+ /* a new snapshot */
+ ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
+ if (ret < 0)
+ return ret;
+
+ /* note that we add it backward so using n and not p */
+ list_add(&snap->node, n);
+ p = &snap->node;
+ }
+ }
+ /* we're done going over the old snap list, just add what's left */
+ for (; i > 0; i--) {
+ name = rbd_prev_snap_name(name, first_name);
+ if (!name) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
+ if (ret < 0)
+ return ret;
+ list_add(&snap->node, &rbd_dev->snaps);
+ }
+
+ return 0;
+}
+
+
+static void rbd_root_dev_release(struct device *dev)
+{
+}
+
+static struct device rbd_root_dev = {
+ .init_name = "rbd",
+ .release = rbd_root_dev_release,
+};
+
+static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
+{
+ int ret = -ENOMEM;
+ struct device *dev;
+ struct rbd_snap *snap;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ dev = &rbd_dev->dev;
+
+ dev->bus = &rbd_bus_type;
+ dev->type = &rbd_device_type;
+ dev->parent = &rbd_root_dev;
+ dev->release = rbd_dev_release;
+ dev_set_name(dev, "%d", rbd_dev->id);
+ ret = device_register(dev);
+ if (ret < 0)
+ goto done_free;
+
+ list_for_each_entry(snap, &rbd_dev->snaps, node) {
+ ret = rbd_register_snap_dev(rbd_dev, snap,
+ &rbd_dev->dev);
+ if (ret < 0)
break;
}
mutex_unlock(&ctl_mutex);
- return n;
+ return 0;
+done_free:
+ mutex_unlock(&ctl_mutex);
+ return ret;
}
-static ssize_t class_rbd_add(struct class *c,
- struct class_attribute *attr,
- const char *buf, size_t count)
+static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
+{
+ device_unregister(&rbd_dev->dev);
+}
+
+static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count)
{
struct ceph_osd_client *osdc;
struct rbd_device *rbd_dev;
@@ -1419,6 +1728,7 @@ static ssize_t class_rbd_add(struct class *c,
/* static rbd_device initialization */
spin_lock_init(&rbd_dev->lock);
INIT_LIST_HEAD(&rbd_dev->node);
+ INIT_LIST_HEAD(&rbd_dev->snaps);
/* generate unique id: find highest unique id, add one */
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
@@ -1478,6 +1788,9 @@ static ssize_t class_rbd_add(struct class *c,
}
rbd_dev->major = irc;
+ rc = rbd_bus_add_dev(rbd_dev);
+ if (rc)
+ goto err_out_disk;
/* set up and announce blkdev mapping */
rc = rbd_init_disk(rbd_dev);
if (rc)
@@ -1487,6 +1800,8 @@ static ssize_t class_rbd_add(struct class *c,
err_out_blkdev:
unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_disk:
+ rbd_free_disk(rbd_dev);
err_out_client:
rbd_put_client(rbd_dev);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
@@ -1518,35 +1833,10 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
return NULL;
}
-static ssize_t class_rbd_remove(struct class *c,
- struct class_attribute *attr,
- const char *buf,
- size_t count)
+static void rbd_dev_release(struct device *dev)
{
- struct rbd_device *rbd_dev = NULL;
- int target_id, rc;
- unsigned long ul;
-
- rc = strict_strtoul(buf, 10, &ul);
- if (rc)
- return rc;
-
- /* convert to int; abort if we lost anything in the conversion */
- target_id = (int) ul;
- if (target_id != ul)
- return -EINVAL;
-
- /* remove object from list immediately */
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- rbd_dev = __rbd_get_dev(target_id);
- if (rbd_dev)
- list_del_init(&rbd_dev->node);
-
- mutex_unlock(&ctl_mutex);
-
- if (!rbd_dev)
- return -ENOENT;
+ struct rbd_device *rbd_dev =
+ container_of(dev, struct rbd_device, dev);
rbd_put_client(rbd_dev);
@@ -1557,67 +1847,11 @@ static ssize_t class_rbd_remove(struct class *c,
/* release module ref */
module_put(THIS_MODULE);
-
- return count;
}
-static ssize_t class_rbd_snaps_list(struct class *c,
- struct class_attribute *attr,
- char *data)
-{
- struct rbd_device *rbd_dev = NULL;
- struct list_head *tmp;
- struct rbd_image_header *header;
- int i, n = 0, max = PAGE_SIZE;
- int ret;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
- n += snprintf(data, max, "#id\tsnap\tKB\n");
-
- list_for_each(tmp, &rbd_dev_list) {
- char *names, *p;
- struct ceph_snap_context *snapc;
-
- rbd_dev = list_entry(tmp, struct rbd_device, node);
- header = &rbd_dev->header;
-
- down_read(&header->snap_rwsem);
-
- names = header->snap_names;
- snapc = header->snapc;
-
- n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
- rbd_dev->id, RBD_SNAP_HEAD_NAME,
- header->image_size >> 10,
- (!rbd_dev->cur_snap ? " (*)" : ""));
- if (n == max)
- break;
-
- p = names;
- for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
- n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
- rbd_dev->id, p, header->snap_sizes[i] >> 10,
- (rbd_dev->cur_snap &&
- (snap_index(header, i) == rbd_dev->cur_snap) ?
- " (*)" : ""));
- if (n == max)
- break;
- }
-
- up_read(&header->snap_rwsem);
- }
-
-
- ret = n;
- mutex_unlock(&ctl_mutex);
- return ret;
-}
-
-static ssize_t class_rbd_snaps_refresh(struct class *c,
- struct class_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t rbd_remove(struct bus_type *bus,
+ const char *buf,
+ size_t count)
{
struct rbd_device *rbd_dev = NULL;
int target_id, rc;
@@ -1641,95 +1875,70 @@ static ssize_t class_rbd_snaps_refresh(struct class *c,
goto done;
}
- rc = rbd_update_snaps(rbd_dev);
- if (rc < 0)
- ret = rc;
+ list_del_init(&rbd_dev->node);
+
+ __rbd_remove_all_snaps(rbd_dev);
+ rbd_bus_del_dev(rbd_dev);
done:
mutex_unlock(&ctl_mutex);
return ret;
}
-static ssize_t class_rbd_snap_create(struct class *c,
- struct class_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t rbd_snap_add(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
{
- struct rbd_device *rbd_dev = NULL;
- int target_id, ret;
- char *name;
-
- name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ int ret;
+ char *name = kmalloc(count + 1, GFP_KERNEL);
if (!name)
return -ENOMEM;
- /* parse snaps add command */
- if (sscanf(buf, "%d "
- "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
- &target_id,
- name) != 2) {
- ret = -EINVAL;
- goto done;
- }
+ snprintf(name, count, "%s", buf);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rbd_dev = __rbd_get_dev(target_id);
- if (!rbd_dev) {
- ret = -ENOENT;
- goto done_unlock;
- }
-
ret = rbd_header_add_snap(rbd_dev,
name, GFP_KERNEL);
if (ret < 0)
goto done_unlock;
- ret = rbd_update_snaps(rbd_dev);
+ ret = __rbd_update_snaps(rbd_dev);
if (ret < 0)
goto done_unlock;
ret = count;
done_unlock:
mutex_unlock(&ctl_mutex);
-done:
kfree(name);
return ret;
}
-static ssize_t class_rbd_rollback(struct class *c,
- struct class_attribute *attr,
- const char *buf,
- size_t count)
+static ssize_t rbd_snap_rollback(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
{
- struct rbd_device *rbd_dev = NULL;
- int target_id, ret;
+ struct rbd_device *rbd_dev = dev_to_rbd(dev);
+ int ret;
u64 snapid;
- char snap_name[RBD_MAX_SNAP_NAME_LEN];
u64 cur_ofs;
- char *seg_name;
+ char *seg_name = NULL;
+ char *snap_name = kmalloc(count + 1, GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!snap_name)
+ return ret;
/* parse snaps add command */
- if (sscanf(buf, "%d "
- "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
- &target_id,
- snap_name) != 2) {
- return -EINVAL;
- }
-
- ret = -ENOMEM;
+ snprintf(snap_name, count, "%s", buf);
seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
if (!seg_name)
- return ret;
+ goto done;
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rbd_dev = __rbd_get_dev(target_id);
- if (!rbd_dev) {
- ret = -ENOENT;
- goto done_unlock;
- }
-
ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
if (ret < 0)
goto done_unlock;
@@ -1750,7 +1959,7 @@ static ssize_t class_rbd_rollback(struct class *c,
seg_name, ret);
}
- ret = rbd_update_snaps(rbd_dev);
+ ret = __rbd_update_snaps(rbd_dev);
if (ret < 0)
goto done_unlock;
@@ -1758,57 +1967,42 @@ static ssize_t class_rbd_rollback(struct class *c,
done_unlock:
mutex_unlock(&ctl_mutex);
+done:
kfree(seg_name);
+ kfree(snap_name);
return ret;
}
-static struct class_attribute class_rbd_attrs[] = {
- __ATTR(add, 0200, NULL, class_rbd_add),
- __ATTR(remove, 0200, NULL, class_rbd_remove),
- __ATTR(list, 0444, class_rbd_list, NULL),
- __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh),
- __ATTR(snap_create, 0200, NULL, class_rbd_snap_create),
- __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL),
- __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback),
+static struct bus_attribute rbd_bus_attrs[] = {
+ __ATTR(add, S_IWUSR, NULL, rbd_add),
+ __ATTR(remove, S_IWUSR, NULL, rbd_remove),
__ATTR_NULL
};
/*
* create control files in sysfs
- * /sys/class/rbd/...
+ * /sys/bus/rbd/...
*/
static int rbd_sysfs_init(void)
{
- int ret = -ENOMEM;
+ int ret;
- class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL);
- if (!class_rbd)
- goto out;
+ rbd_bus_type.bus_attrs = rbd_bus_attrs;
- class_rbd->name = DRV_NAME;
- class_rbd->owner = THIS_MODULE;
- class_rbd->class_release = class_rbd_release;
- class_rbd->class_attrs = class_rbd_attrs;
+ ret = bus_register(&rbd_bus_type);
+ if (ret < 0)
+ return ret;
- ret = class_register(class_rbd);
- if (ret)
- goto out_class;
- return 0;
+ ret = device_register(&rbd_root_dev);
-out_class:
- kfree(class_rbd);
- class_rbd = NULL;
- pr_err(DRV_NAME ": failed to create class rbd\n");
-out:
return ret;
}
static void rbd_sysfs_cleanup(void)
{
- if (class_rbd)
- class_destroy(class_rbd);
- class_rbd = NULL;
+ device_unregister(&rbd_root_dev);
+ bus_unregister(&rbd_bus_type);
}
int __init rbd_init(void)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 255035cfc88a..657873e4328d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,14 +65,14 @@ enum blkif_state {
struct blk_shadow {
struct blkif_request req;
- unsigned long request;
+ struct request *request;
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops;
-#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
info->shadow[id].req.id = info->shadow_free;
- info->shadow[id].request = 0;
+ info->shadow[id].request = NULL;
info->shadow_free = id;
}
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
}
/*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request. Reads
+ * and writes are handled as expected. Since we lack a loose flush
+ * request, we map flushes into a full ordered barrier.
*
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
+ * @req: a request struct
*/
static int blkif_queue_request(struct request *req)
{
@@ -281,7 +278,7 @@ static int blkif_queue_request(struct request *req)
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = get_id_from_freelist(info);
- info->shadow[id].request = (unsigned long)req;
+ info->shadow[id].request = req;
ring_req->id = id;
ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -290,6 +287,18 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
+ if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+ /*
+ * Ideally we could just do an unordered
+ * flush-to-disk, but all we have is a full write
+ * barrier at the moment. However, a barrier write is
+ * a superset of FUA, so we can implement it the same
+ * way. (It's also a FLUSH+FUA, since it is
+ * guaranteed ordered WRT previous writes.)
+ */
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+ }
+
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -634,7 +643,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
- req = (struct request *)info->shadow[id].request;
+ req = info->shadow[id].request;
blkif_completion(&info->shadow[id]);
@@ -647,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
info->gd->disk_name);
error = -EOPNOTSUPP;
+ }
+ if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+ info->shadow[id].req.nr_segments == 0)) {
+ printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
+ info->gd->disk_name);
+ error = -EOPNOTSUPP;
+ }
+ if (unlikely(error)) {
+ if (error == -EOPNOTSUPP)
+ error = 0;
info->feature_flush = 0;
xlvbd_flush(info);
}
@@ -899,7 +918,7 @@ static int blkif_recover(struct blkfront_info *info)
/* Stage 3: Find pending requests and requeue them. */
for (i = 0; i < BLK_RING_SIZE; i++) {
/* Not in use? */
- if (copy[i].request == 0)
+ if (!copy[i].request)
continue;
/* Grab a request slot and copy shadow state into it. */
@@ -916,9 +935,7 @@ static int blkif_recover(struct blkfront_info *info)
req->seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
- rq_data_dir(
- (struct request *)
- info->shadow[req->id].request));
+ rq_data_dir(info->shadow[req->id].request));
info->shadow[req->id].req = *req;
info->ring.req_prod_pvt++;
@@ -1067,14 +1084,8 @@ static void blkfront_connect(struct blkfront_info *info)
*/
info->feature_flush = 0;
- /*
- * The driver doesn't properly handled empty flushes, so
- * lets disable barrier support for now.
- */
-#if 0
if (!err && barrier)
- info->feature_flush = REQ_FLUSH;
-#endif
+ info->feature_flush = REQ_FLUSH | REQ_FUA;
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {