summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/ceph/auth_x.c2
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/inode.c19
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/messenger.c4
-rw-r--r--fs/ceph/mon_client.c3
-rw-r--r--fs/ceph/osd_client.c3
-rw-r--r--fs/ceph/osdmap.c1
-rw-r--r--fs/fs-writeback.c335
-rw-r--r--fs/splice.c9
14 files changed, 184 insertions, 265 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d..722743b152d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec5..d74e6af9b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ff..3fe49042d8a 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 619b61655ee..74144d6389f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ caps_use_count++;
+ caps_total_count++;
+ }
+ return cap;
+ }
spin_lock(&caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb654..a4eec133258 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066c..f2f5332ddbb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ab47f46ca28..8f9b9fe8ef9 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1234,18 +1234,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1766947fc07..3ab79f6c4ce 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2783,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863..9ad43a310a4 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -657,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1396,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 07a539906e6..cc115eafae1 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -725,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b..92b7251a53f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c0..50ce64ebd33 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -707,6 +707,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0609607d395..d5be1693ac9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,43 +38,18 @@ int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
-struct wb_writeback_args {
+struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
struct list_head list; /* pending work list */
- struct rcu_head rcu_head; /* for RCU free/clear of work */
-
- unsigned long seen; /* threads that have seen this work */
- atomic_t pending; /* number of threads still to do work */
-
- struct wb_writeback_args args; /* writeback arguments */
-
- unsigned long state; /* flag bits, see WS_* */
+ struct completion *done; /* set if the caller waits */
};
-enum {
- WS_INPROGRESS = 0,
- WS_ONSTACK,
-};
-
-static inline void bdi_work_init(struct bdi_work *work,
- struct wb_writeback_args *args)
-{
- INIT_RCU_HEAD(&work->rcu_head);
- work->args = *args;
- __set_bit(WS_INPROGRESS, &work->state);
-}
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_free(struct rcu_head *head)
-{
- struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
- clear_bit(WS_INPROGRESS, &work->state);
- smp_mb__after_clear_bit();
- wake_up_bit(&work->state, WS_INPROGRESS);
-
- if (!test_bit(WS_ONSTACK, &work->state))
- kfree(work);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
{
- /*
- * The caller has retrieved the work arguments from this work,
- * drop our reference. If this is the last ref, delete and free it
- */
- if (atomic_dec_and_test(&work->pending)) {
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock(&bdi->wb_lock);
- list_del_rcu(&work->list);
- spin_unlock(&bdi->wb_lock);
-
- call_rcu(&work->rcu_head, bdi_work_free);
- }
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
-{
- work->seen = bdi->wb_mask;
- BUG_ON(!work->seen);
- atomic_set(&work->pending, bdi->wb_cnt);
- BUG_ON(!bdi->wb_cnt);
-
- /*
- * list_add_tail_rcu() contains the necessary barriers to
- * make sure the above stores are seen before the item is
- * noticed on the list
- */
spin_lock(&bdi->wb_lock);
- list_add_tail_rcu(&work->list, &bdi->work_list);
+ list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
@@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
}
}
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_done(struct bdi_work *work)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ bool range_cyclic, bool for_background)
{
- wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args)
-{
- struct bdi_work *work;
+ struct wb_writeback_work *work;
/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- bdi_work_init(work, args);
- bdi_queue_work(bdi, work);
- } else {
- struct bdi_writeback *wb = &bdi->wb;
-
- if (wb->task)
- wake_up_process(wb->task);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ if (bdi->wb.task)
+ wake_up_process(bdi->wb.task);
+ return;
}
-}
-/**
- * bdi_queue_work_onstack - start and wait for writeback
- * @sb: write inodes from this super_block
- *
- * Description:
- * This function initiates writeback and waits for the operation to
- * complete. Callers must hold the sb s_umount semaphore for
- * reading, to avoid having the super disappear before we are done.
- */
-static void bdi_queue_work_onstack(struct wb_writeback_args *args)
-{
- struct bdi_work work;
-
- bdi_work_init(&work, args);
- __set_bit(WS_ONSTACK, &work.state);
+ work->sync_mode = WB_SYNC_NONE;
+ work->nr_pages = nr_pages;
+ work->range_cyclic = range_cyclic;
+ work->for_background = for_background;
- bdi_queue_work(args->sb->s_bdi, &work);
- bdi_wait_on_work_done(&work);
+ bdi_queue_work(bdi, work);
}
/**
@@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args)
*/
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- struct wb_writeback_args args = {
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = nr_pages,
- .range_cyclic = 1,
- };
-
- bdi_alloc_queue_work(bdi, &args);
+ __bdi_start_writeback(bdi, nr_pages, true, false);
}
/**
@@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
*/
void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- struct wb_writeback_args args = {
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = LONG_MAX,
- .for_background = 1,
- .range_cyclic = 1,
- };
- bdi_alloc_queue_work(bdi, &args);
+ __bdi_start_writeback(bdi, LONG_MAX, true, true);
}
/*
@@ -554,29 +453,41 @@ static bool pin_sb_for_writeback(struct super_block *sb)
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -614,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -629,29 +540,12 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- if (wbc->sb) {
- /*
- * We are requested to write out inodes for a specific
- * superblock. This means we already have s_umount
- * taken by the caller which also waits for us to
- * complete the writeout.
- */
- if (sb != wbc->sb) {
- redirty_tail(inode);
- continue;
- }
-
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
- ret = writeback_sb_inodes(sb, wb, wbc);
- } else {
- if (!pin_sb_for_writeback(sb)) {
- requeue_io(inode);
- continue;
- }
- ret = writeback_sb_inodes(sb, wb, wbc);
- drop_super(sb);
+ if (!pin_sb_for_writeback(sb)) {
+ requeue_io(inode);
+ continue;
}
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
if (ret)
break;
@@ -660,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -702,16 +602,14 @@ static inline bool over_bground_thresh(void)
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
- struct wb_writeback_args *args)
+ struct wb_writeback_work *work)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
- .sync_mode = args->sync_mode,
+ .sync_mode = work->sync_mode,
.older_than_this = NULL,
- .for_kupdate = args->for_kupdate,
- .for_background = args->for_background,
- .range_cyclic = args->range_cyclic,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -731,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Stop writeback when nr_pages has been consumed
*/
- if (args->nr_pages <= 0)
+ if (work->nr_pages <= 0)
break;
/*
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (args->for_background && !over_bground_thresh())
+ if (work->for_background && !over_bground_thresh())
break;
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
- args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ if (work->sb)
+ __writeback_inodes_sb(work->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
+ work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
/*
@@ -781,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
}
/*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
- struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
{
- struct bdi_work *work, *ret = NULL;
-
- rcu_read_lock();
+ struct wb_writeback_work *work = NULL;
- list_for_each_entry_rcu(work, &bdi->work_list, list) {
- if (!test_bit(wb->nr, &work->seen))
- continue;
- clear_bit(wb->nr, &work->seen);
-
- ret = work;
- break;
+ spin_lock(&bdi->wb_lock);
+ if (!list_empty(&bdi->work_list)) {
+ work = list_entry(bdi->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
}
-
- rcu_read_unlock();
- return ret;
+ spin_unlock(&bdi->wb_lock);
+ return work;
}
static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -830,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
- struct wb_writeback_args args = {
+ struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
};
- return wb_writeback(wb, &args);
+ return wb_writeback(wb, &work);
}
return 0;
@@ -849,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
{
struct backing_dev_info *bdi = wb->bdi;
- struct bdi_work *work;
+ struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
- struct wb_writeback_args args = work->args;
-
/*
* Override sync mode, in case we must wait for completion
+ * because this thread is exiting now.
*/
if (force_wait)
- work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
- /*
- * If this isn't a data integrity operation, just notify
- * that we have seen this work and we are now starting it.
- */
- if (!test_bit(WS_ONSTACK, &work->state))
- wb_clear_pending(wb, work);
+ work->sync_mode = WB_SYNC_ALL;
- wrote += wb_writeback(wb, &args);
+ wrote += wb_writeback(wb, work);
/*
- * This is a data integrity writeback, so only do the
- * notification when we have completed the work.
+ * Notify the caller of completion if this is a synchronous
+ * work item, otherwise just free it.
*/
- if (test_bit(WS_ONSTACK, &work->state))
- wb_clear_pending(wb, work);
+ if (work->done)
+ complete(work->done);
+ else
+ kfree(work);
}
/*
@@ -938,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb)
void wakeup_flusher_threads(long nr_pages)
{
struct backing_dev_info *bdi;
- struct wb_writeback_args args = {
- .sync_mode = WB_SYNC_NONE,
- };
- if (nr_pages) {
- args.nr_pages = nr_pages;
- } else {
- args.nr_pages = global_page_state(NR_FILE_DIRTY) +
+ if (!nr_pages) {
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS);
}
@@ -953,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages)
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
- bdi_alloc_queue_work(bdi, &args);
+ __bdi_start_writeback(bdi, nr_pages, false, false);
}
rcu_read_unlock();
}
@@ -1162,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- struct wb_writeback_args args = {
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
+ .done = &done,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- args.nr_pages = nr_dirty + nr_unstable +
+ work.nr_pages = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
- bdi_queue_work_onstack(&args);
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
}
EXPORT_SYMBOL(writeback_inodes_sb);
@@ -1204,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- struct wb_writeback_args args = {
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
+ .done = &done,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- bdi_queue_work_onstack(&args);
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
+
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/splice.c b/fs/splice.c
index 740e6b9faf7..efdbfece993 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
{
struct file *file = sd->u.file;
- return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+ return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+ sd->flags);
}
/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_in)
return -ESPIPE;
if (off_out) {
- if (!out->f_op || !out->f_op->llseek ||
- out->f_op->llseek == no_llseek)
+ if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_out)
return -ESPIPE;
if (off_in) {
- if (!in->f_op || !in->f_op->llseek ||
- in->f_op->llseek == no_llseek)
+ if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;