diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/afs/write.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
| -rw-r--r-- | fs/ceph/auth_x.c | 2 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 21 | ||||
| -rw-r--r-- | fs/ceph/crush/mapper.c | 41 | ||||
| -rw-r--r-- | fs/ceph/debugfs.c | 2 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 19 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 6 | ||||
| -rw-r--r-- | fs/ceph/messenger.c | 4 | ||||
| -rw-r--r-- | fs/ceph/mon_client.c | 3 | ||||
| -rw-r--r-- | fs/ceph/osd_client.c | 3 | ||||
| -rw-r--r-- | fs/ceph/osdmap.c | 1 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 335 | ||||
| -rw-r--r-- | fs/splice.c | 9 |
14 files changed, 184 insertions, 265 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d..722743b152d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) { struct address_space *mapping = vnode->vfs_inode.i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_cyclic = 1, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec5..d74e6af9b53 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = inode->i_mapping->backing_dev_info, .sync_mode = mode, .older_than_this = NULL, .nr_to_write = nr_pages * 2, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 83d4d2785ff..3fe49042d8a 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, return -EAGAIN; } - op = le32_to_cpu(head->op); + op = le16_to_cpu(head->op); result = le32_to_cpu(head->result); dout("handle_reply op %d result %d\n", op, result); switch (op) { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 619b61655ee..74144d6389f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ - if (!ctx) - return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (!ctx) { + cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (cap) { + caps_use_count++; + caps_total_count++; + } + return cap; + } spin_lock(&caps_list_lock); dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", @@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; + int used, dirty; int ret = 0; - int used = 0; spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); + dirty = __ceph_caps_dirty(ci); - dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, - mds, ceph_cap_string(used), ceph_cap_string(drop), + dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", + inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), ceph_cap_string(unless)); - /* only drop unused caps */ - drop &= ~used; + /* only drop unused, clean caps */ + drop &= ~(used | dirty); cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c index 9ba54efb654..a4eec133258 100644 --- a/fs/ceph/crush/mapper.c +++ b/fs/ceph/crush/mapper.c @@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, static int crush_bucket_choose(struct crush_bucket *in, int x, int r) { - dprintk("choose %d x=%d r=%d\n", in->id, x, r); + dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); switch (in->alg) { case CRUSH_BUCKET_UNIFORM: return bucket_uniform_choose((struct crush_bucket_uniform *)in, @@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) */ static int is_out(struct crush_map *map, __u32 *weight, int item, int x) { - if (weight[item] >= 0x1000) + if (weight[item] >= 0x10000) return 0; if (weight[item] == 0) return 1; @@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map, int itemtype; int collide, reject; const int orig_tries = 5; /* attempts before we fall back to search */ - dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos); + + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", + bucket->id, x, outpos, numrep); for (rep = outpos; rep < numrep; rep++) { /* keep trying until we get a non-out, non-colliding item */ @@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map, BUG_ON(item >= 0 || (-1-item) >= map->max_buckets); in = map->buckets[-1-item]; + retry_bucket = 1; continue; } @@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map, } } - if (recurse_to_leaf && - item < 0 && - crush_choose(map, map->buckets[-1-item], - weight, - x, outpos+1, 0, - out2, outpos, - firstn, 0, NULL) <= outpos) { - reject = 1; - } else { + reject = 0; + if (recurse_to_leaf) { + if (item < 0) { + if (crush_choose(map, + map->buckets[-1-item], + weight, + x, outpos+1, 0, + out2, outpos, + firstn, 0, + NULL) <= outpos) + /* didn't get leaf */ + reject = 1; + } else { + /* we already have a leaf! */ + out2[outpos] = item; + } + } + + if (!reject) { /* out? */ if (itemtype == 0) reject = is_out(map, weight, @@ -424,12 +437,12 @@ reject: continue; } - dprintk("choose got %d\n", item); + dprintk("CHOOSE got %d\n", item); out[outpos] = item; outpos++; } - dprintk("choose returns %d\n", outpos); + dprintk("CHOOSE returns %d\n", outpos); return outpos; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3be33fb066c..f2f5332ddbb 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp) static int caps_show(struct seq_file *s, void *p) { - struct ceph_client *client = p; + struct ceph_client *client = s->private; int total, avail, used, reserved, min; ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab47f46ca28..8f9b9fe8ef9 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, d_drop(dn); realdn = d_materialise_unique(dn, in); if (IS_ERR(realdn)) { - pr_err("splice_dentry error %p inode %p ino %llx.%llx\n", - dn, in, ceph_vinop(in)); + pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); if (prehash) *prehash = false; /* don't rehash on error */ dn = realdn; /* note realdn contains the error */ @@ -1234,18 +1234,23 @@ retry_lookup: goto out; } dn = splice_dentry(dn, in, NULL); + if (IS_ERR(dn)) + dn = NULL; } if (fill_inode(in, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation) < 0) { pr_err("fill_inode badness on %p\n", in); - dput(dn); - continue; + goto next_item; } - update_dentry_lease(dn, rinfo->dir_dlease[i], - req->r_session, req->r_request_started); - dput(dn); + if (dn) + update_dentry_lease(dn, rinfo->dir_dlease[i], + req->r_session, + req->r_request_started); +next_item: + if (dn) + dput(dn); } req->r_did_prepopulate = true; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1766947fc07..3ab79f6c4ce 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2783,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) drop_leases(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); + + /* + * wait for reply handlers to drop their request refs and + * their inode/dcache refs + */ + ceph_msgr_flush(); } /* diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 64b8b1f7863..9ad43a310a4 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -657,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; + con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -1396,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con) if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, con->in_hdr.front_len, con->in_hdr.data_len); + skip = 0; con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); + BUG_ON(con->in_msg); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 07a539906e6..cc115eafae1 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -725,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc, dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = monc->auth->global_id; + monc->client->msgr->inst.name.num = + cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); __resend_generic_request(monc); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index d25b4add85b..92b7251a53f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) int type = le16_to_cpu(msg->hdr.type); if (!osd) - return; + goto out; osdc = osd->o_osdc; switch (type) { @@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } +out: ceph_msg_put(msg); } diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index ddc656fb5c0..50ce64ebd33 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -707,6 +707,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) return ERR_CAST(newcrush); + *p += len; } /* new flags? */ diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0609607d395..d5be1693ac9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -38,43 +38,18 @@ int nr_pdflush_threads; /* * Passed into wb_writeback(), essentially a subset of writeback_control */ -struct wb_writeback_args { +struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; -}; -/* - * Work items for the bdi_writeback threads - */ -struct bdi_work { struct list_head list; /* pending work list */ - struct rcu_head rcu_head; /* for RCU free/clear of work */ - - unsigned long seen; /* threads that have seen this work */ - atomic_t pending; /* number of threads still to do work */ - - struct wb_writeback_args args; /* writeback arguments */ - - unsigned long state; /* flag bits, see WS_* */ + struct completion *done; /* set if the caller waits */ }; -enum { - WS_INPROGRESS = 0, - WS_ONSTACK, -}; - -static inline void bdi_work_init(struct bdi_work *work, - struct wb_writeback_args *args) -{ - INIT_RCU_HEAD(&work->rcu_head); - work->args = *args; - __set_bit(WS_INPROGRESS, &work->state); -} - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) return !list_empty(&bdi->work_list); } -static void bdi_work_free(struct rcu_head *head) -{ - struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); - - clear_bit(WS_INPROGRESS, &work->state); - smp_mb__after_clear_bit(); - wake_up_bit(&work->state, WS_INPROGRESS); - - if (!test_bit(WS_ONSTACK, &work->state)) - kfree(work); -} - -static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) +static void bdi_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_work *work) { - /* - * The caller has retrieved the work arguments from this work, - * drop our reference. If this is the last ref, delete and free it - */ - if (atomic_dec_and_test(&work->pending)) { - struct backing_dev_info *bdi = wb->bdi; - - spin_lock(&bdi->wb_lock); - list_del_rcu(&work->list); - spin_unlock(&bdi->wb_lock); - - call_rcu(&work->rcu_head, bdi_work_free); - } -} - -static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) -{ - work->seen = bdi->wb_mask; - BUG_ON(!work->seen); - atomic_set(&work->pending, bdi->wb_cnt); - BUG_ON(!bdi->wb_cnt); - - /* - * list_add_tail_rcu() contains the necessary barriers to - * make sure the above stores are seen before the item is - * noticed on the list - */ spin_lock(&bdi->wb_lock); - list_add_tail_rcu(&work->list, &bdi->work_list); + list_add_tail(&work->list, &bdi->work_list); spin_unlock(&bdi->wb_lock); /* @@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) } } -/* - * Used for on-stack allocated work items. The caller needs to wait until - * the wb threads have acked the work before it's safe to continue. - */ -static void bdi_wait_on_work_done(struct bdi_work *work) +static void +__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, + bool range_cyclic, bool for_background) { - wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); -} - -static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args) -{ - struct bdi_work *work; + struct wb_writeback_work *work; /* * This is WB_SYNC_NONE writeback, so if allocation fails just * wakeup the thread for old dirty data writeback */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - bdi_work_init(work, args); - bdi_queue_work(bdi, work); - } else { - struct bdi_writeback *wb = &bdi->wb; - - if (wb->task) - wake_up_process(wb->task); + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + if (bdi->wb.task) + wake_up_process(bdi->wb.task); + return; } -} -/** - * bdi_queue_work_onstack - start and wait for writeback - * @sb: write inodes from this super_block - * - * Description: - * This function initiates writeback and waits for the operation to - * complete. Callers must hold the sb s_umount semaphore for - * reading, to avoid having the super disappear before we are done. - */ -static void bdi_queue_work_onstack(struct wb_writeback_args *args) -{ - struct bdi_work work; - - bdi_work_init(&work, args); - __set_bit(WS_ONSTACK, &work.state); + work->sync_mode = WB_SYNC_NONE; + work->nr_pages = nr_pages; + work->range_cyclic = range_cyclic; + work->for_background = for_background; - bdi_queue_work(args->sb->s_bdi, &work); - bdi_wait_on_work_done(&work); + bdi_queue_work(bdi, work); } /** @@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) */ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - .nr_pages = nr_pages, - .range_cyclic = 1, - }; - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, true, false); } /** @@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) */ void bdi_start_background_writeback(struct backing_dev_info *bdi) { - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - .nr_pages = LONG_MAX, - .for_background = 1, - .range_cyclic = 1, - }; - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, LONG_MAX, true, true); } /* @@ -554,29 +453,41 @@ static bool pin_sb_for_writeback(struct super_block *sb) /* * Write a portion of b_io inodes which belong to @sb. - * If @wbc->sb != NULL, then find and write all such + * + * If @only_this_sb is true, then find and write all such * inodes. Otherwise write only ones which go sequentially * in reverse order. + * * Return 1, if the caller writeback routine should be * interrupted. Otherwise return 0. */ -static int writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, + struct writeback_control *wbc, bool only_this_sb) { while (!list_empty(&wb->b_io)) { long pages_skipped; struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); - if (wbc->sb && sb != inode->i_sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - if (sb != inode->i_sb) - /* finish with this superblock */ + + if (inode->i_sb != sb) { + if (only_this_sb) { + /* + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. + */ + redirty_tail(inode); + continue; + } + + /* + * The inode belongs to a different superblock. + * Bounce back to the caller to unpin this and + * pin the next superblock. + */ return 0; + } + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; @@ -614,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, return 1; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) { int ret = 0; @@ -629,29 +540,12 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct inode, i_list); struct super_block *sb = inode->i_sb; - if (wbc->sb) { - /* - * We are requested to write out inodes for a specific - * superblock. This means we already have s_umount - * taken by the caller which also waits for us to - * complete the writeout. - */ - if (sb != wbc->sb) { - redirty_tail(inode); - continue; - } - - WARN_ON(!rwsem_is_locked(&sb->s_umount)); - - ret = writeback_sb_inodes(sb, wb, wbc); - } else { - if (!pin_sb_for_writeback(sb)) { - requeue_io(inode); - continue; - } - ret = writeback_sb_inodes(sb, wb, wbc); - drop_super(sb); + if (!pin_sb_for_writeback(sb)) { + requeue_io(inode); + continue; } + ret = writeback_sb_inodes(sb, wb, wbc, false); + drop_super(sb); if (ret) break; @@ -660,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } -void writeback_inodes_wbc(struct writeback_control *wbc) +static void __writeback_inodes_sb(struct super_block *sb, + struct bdi_writeback *wb, struct writeback_control *wbc) { - struct backing_dev_info *bdi = wbc->bdi; + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - writeback_inodes_wb(&bdi->wb, wbc); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + writeback_sb_inodes(sb, wb, wbc, true); + spin_unlock(&inode_lock); } /* @@ -702,16 +602,14 @@ static inline bool over_bground_thresh(void) * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, - struct wb_writeback_args *args) + struct wb_writeback_work *work) { struct writeback_control wbc = { - .bdi = wb->bdi, - .sb = args->sb, - .sync_mode = args->sync_mode, + .sync_mode = work->sync_mode, .older_than_this = NULL, - .for_kupdate = args->for_kupdate, - .for_background = args->for_background, - .range_cyclic = args->range_cyclic, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, + .range_cyclic = work->range_cyclic, }; unsigned long oldest_jif; long wrote = 0; @@ -731,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, /* * Stop writeback when nr_pages has been consumed */ - if (args->nr_pages <= 0) + if (work->nr_pages <= 0) break; /* * For background writeout, stop when we are below the * background dirty threshold */ - if (args->for_background && !over_bground_thresh()) + if (work->for_background && !over_bground_thresh()) break; wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - writeback_inodes_wb(wb, &wbc); - args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; + if (work->sb) + __writeback_inodes_sb(work->sb, wb, &wbc); + else + writeback_inodes_wb(wb, &wbc); + work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* @@ -781,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, } /* - * Return the next bdi_work struct that hasn't been processed by this - * wb thread yet. ->seen is initially set for each thread that exists - * for this device, when a thread first notices a piece of work it - * clears its bit. Depending on writeback type, the thread will notify - * completion on either receiving the work (WB_SYNC_NONE) or after - * it is done (WB_SYNC_ALL). + * Return the next wb_writeback_work struct that hasn't been processed yet. */ -static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, - struct bdi_writeback *wb) +static struct wb_writeback_work * +get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) { - struct bdi_work *work, *ret = NULL; - - rcu_read_lock(); + struct wb_writeback_work *work = NULL; - list_for_each_entry_rcu(work, &bdi->work_list, list) { - if (!test_bit(wb->nr, &work->seen)) - continue; - clear_bit(wb->nr, &work->seen); - - ret = work; - break; + spin_lock(&bdi->wb_lock); + if (!list_empty(&bdi->work_list)) { + work = list_entry(bdi->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); } - - rcu_read_unlock(); - return ret; + spin_unlock(&bdi->wb_lock); + return work; } static long wb_check_old_data_flush(struct bdi_writeback *wb) @@ -830,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) (inodes_stat.nr_inodes - inodes_stat.nr_unused); if (nr_pages) { - struct wb_writeback_args args = { + struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, }; - return wb_writeback(wb, &args); + return wb_writeback(wb, &work); } return 0; @@ -849,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; - struct bdi_work *work; + struct wb_writeback_work *work; long wrote = 0; while ((work = get_next_work_item(bdi, wb)) != NULL) { - struct wb_writeback_args args = work->args; - /* * Override sync mode, in case we must wait for completion + * because this thread is exiting now. */ if (force_wait) - work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; - - /* - * If this isn't a data integrity operation, just notify - * that we have seen this work and we are now starting it. - */ - if (!test_bit(WS_ONSTACK, &work->state)) - wb_clear_pending(wb, work); + work->sync_mode = WB_SYNC_ALL; - wrote += wb_writeback(wb, &args); + wrote += wb_writeback(wb, work); /* - * This is a data integrity writeback, so only do the - * notification when we have completed the work. + * Notify the caller of completion if this is a synchronous + * work item, otherwise just free it. */ - if (test_bit(WS_ONSTACK, &work->state)) - wb_clear_pending(wb, work); + if (work->done) + complete(work->done); + else + kfree(work); } /* @@ -938,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) void wakeup_flusher_threads(long nr_pages) { struct backing_dev_info *bdi; - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - }; - if (nr_pages) { - args.nr_pages = nr_pages; - } else { - args.nr_pages = global_page_state(NR_FILE_DIRTY) + + if (!nr_pages) { + nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); } @@ -953,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, false, false); } rcu_read_unlock(); } @@ -1162,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) { unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - struct wb_writeback_args args = { + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, + .done = &done, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - args.nr_pages = nr_dirty + nr_unstable + + work.nr_pages = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_queue_work_onstack(&args); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1204,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); */ void sync_inodes_sb(struct super_block *sb) { - struct wb_writeback_args args = { + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, + .done = &done, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - bdi_queue_work_onstack(&args); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/splice.c b/fs/splice.c index 740e6b9faf7..efdbfece993 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, { struct file *file = sd->u.file; - return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); + return do_splice_from(pipe, file, &file->f_pos, sd->total_len, + sd->flags); } /** @@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_in) return -ESPIPE; if (off_out) { - if (!out->f_op || !out->f_op->llseek || - out->f_op->llseek == no_llseek) + if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; @@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_out) return -ESPIPE; if (off_in) { - if (!in->f_op || !in->f_op->llseek || - in->f_op->llseek == no_llseek) + if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; |
