From 4524451ef7e88c64a868a8f5a0b49bda73beb2a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:16 -0400 Subject: jbd: replace barriers with explicit flush / FUA usage Switch to the WRITE_FLUSH_FUA flag for journal commits and remove the EOPNOTSUPP detection for barriers. Signed-off-by: Christoph Hellwig Acked-by: Jan Kara Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/jbd/commit.c | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) (limited to 'fs/jbd/commit.c') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929..484c5e5fa8a 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -137,34 +137,10 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); - if (journal->j_flags & JFS_BARRIER) { - ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - - /* - * Is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP) { - char b[BDEVNAME_SIZE]; - - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); - - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - ret = sync_dirty_buffer(bh); - } - } else { + if (journal->j_flags & JFS_BARRIER) + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); + else ret = sync_dirty_buffer(bh); - } put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); -- cgit v1.2.3 From 749ef9f8423054e326f3a246327ed2db4b6d395f Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Mon, 20 Sep 2010 15:24:50 +0200 Subject: cfq: improve fsync performance for small files Fsync performance for small files achieved by cfq on high-end disks is lower than what deadline can achieve, due to idling introduced between the sync write happening in process context and the journal commit. Moreover, when competing with a sequential reader, a process writing small files and fsync-ing them is starved. This patch fixes the two problems by: - marking journal commits as WRITE_SYNC, so that they get the REQ_NOIDLE flag set, - force all queues that have REQ_NOIDLE requests to be put in the noidle tree. Having the queue associated to the fsync-ing process and the one associated to journal commits in the noidle tree allows: - switching between them without idling, - fairness vs. competing idling queues, since they will be serviced only after the noidle tree expires its slice. Acked-by: Vivek Goyal Reviewed-by: Jeff Moyer Tested-by: Jeff Moyer Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 18 ++++-------------- fs/jbd/commit.c | 2 +- fs/jbd2/commit.c | 2 +- 3 files changed, 6 insertions(+), 16 deletions(-) (limited to 'fs/jbd/commit.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b9f86190763..68459262173 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -216,7 +216,6 @@ struct cfq_data { enum wl_type_t serving_type; unsigned long workload_expires; struct cfq_group *serving_group; - bool noidle_tree_requires_idle; /* * Each priority tree is sorted by next_request position. These @@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) slice = max_t(unsigned, slice, CFQ_MIN_TT); cfq_log(cfqd, "workload slice:%d", slice); cfqd->workload_expires = jiffies + slice; - cfqd->noidle_tree_requires_idle = false; } static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) @@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->queued[0] + cfqq->queued[1] >= 4) cfq_mark_cfqq_deep(cfqq); - if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || + if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) + enable_idle = 0; + else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { @@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { - cfqd->noidle_tree_requires_idle |= - !(rq->cmd_flags & REQ_NOIDLE); - /* - * Idling is enabled for SYNC_WORKLOAD. - * SYNC_NOIDLE_WORKLOAD idles at the end of the tree - * only if we processed at least one !REQ_NOIDLE request - */ - if (cfqd->serving_type == SYNC_WORKLOAD - || cfqd->noidle_tree_requires_idle - || cfqq->cfqg->nr_cfqq == 1) - cfq_arm_slice_timer(cfqd); + cfq_arm_slice_timer(cfqd); } } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929..3f030e9efea 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal) int first_tag = 0; int tag_flag; int i; - int write_op = WRITE; + int write_op = WRITE_SYNC; /* * First job: lock down the current transaction and wait for diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c068c189d8..80910f51d4b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; - int write_op = WRITE; + int write_op = WRITE_SYNC; /* * First job: lock down the current transaction and wait for -- cgit v1.2.3 From a910eefa511f9d1118effc13fba6773163502c4f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 8 Oct 2010 20:05:06 +0900 Subject: jbd: Convert bitops to buffer fns Convert set/clear_bit(BH_JWrite, ...) to set/clear_buffer_jwrite() for consistency. Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/jbd/commit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/jbd/commit.c') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929..c8428323167 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -617,7 +617,7 @@ void journal_commit_transaction(journal_t *journal) (this will requeue both the metadata buffer and the temporary IO buffer). new_bh goes on BJ_IO*/ - set_bit(BH_JWrite, &jh2bh(jh)->b_state); + set_buffer_jwrite(jh2bh(jh)); /* * akpm: journal_write_metadata_buffer() sets * new_bh->b_transaction to commit_transaction. @@ -627,7 +627,7 @@ void journal_commit_transaction(journal_t *journal) JBUFFER_TRACE(jh, "ph3: write metadata"); flags = journal_write_metadata_buffer(commit_transaction, jh, &new_jh, blocknr); - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); + set_buffer_jwrite(jh2bh(new_jh)); wbuf[bufs++] = jh2bh(new_jh); /* Record the new block's tag in the current descriptor @@ -737,7 +737,7 @@ wait_for_iobuf: shadowed buffer */ jh = commit_transaction->t_shadow_list->b_tprev; bh = jh2bh(jh); - clear_bit(BH_JWrite, &bh->b_state); + clear_buffer_jwrite(bh); J_ASSERT_BH(bh, buffer_jbddirty(bh)); /* The metadata is now released for reuse, but we need -- cgit v1.2.3 From e4d5e3a497e159be7c2dbe4c61cfb185d60cfde2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 16 Oct 2010 17:11:02 +0900 Subject: jbd: Convert atomic_inc() to get_bh() Convert atomic_inc(&bh->b_count) to get_bh(bh) for consistency. Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/jbd/checkpoint.c | 4 ++-- fs/jbd/commit.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/jbd/commit.c') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 05a38b9c4c0..e4b87bc1fa5 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -221,7 +221,7 @@ restart: goto restart; } if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); + get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -283,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, int ret = 0; if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); + get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index c8428323167..a89c4630b1e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -611,7 +611,7 @@ void journal_commit_transaction(journal_t *journal) /* Bump b_count to prevent truncate from stumbling over the shadowed buffer! @@@ This can go if we ever get rid of the BJ_IO/BJ_Shadow pairing of buffers. */ - atomic_inc(&jh2bh(jh)->b_count); + get_bh(jh2bh(jh)); /* Make a temporary IO buffer with which to write it out (this will requeue both the metadata buffer and the -- cgit v1.2.3