From 31373d09da5b7fe21fe6f781e92bd534a3495f00 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 6 Apr 2010 14:25:14 +0200 Subject: laptop-mode: Make flushes per-device One of the features of laptop-mode is that it forces a writeout of dirty pages if something else triggers a physical read or write from a device. The current implementation flushes pages on all devices, rather than only the one that triggered the flush. This patch alters the behaviour so that only the recently accessed block device is flushed, preventing other disks being spun up for no terribly good reason. Signed-off-by: Matthew Garrett Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fcbc26af00e..2742e1adfc3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,8 @@ struct backing_dev_info { struct device *dev; + struct timer_list laptop_mode_wb_timer; + #ifdef CONFIG_DEBUG_FS struct dentry *debug_dir; struct dentry *debug_stats; -- cgit v1.2.3 From e913fc825dc685a444cb4c1d0f9d32f372f59861 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 17 May 2010 12:55:07 +0200 Subject: writeback: fix WB_SYNC_NONE writeback from umount When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 48 ++++++++++++++++++++++++++++++++++----------- fs/sync.c | 2 +- include/linux/backing-dev.h | 2 +- include/linux/writeback.h | 10 ++++++++++ mm/page-writeback.c | 4 ++-- 5 files changed, 51 insertions(+), 15 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 760dc8d0b4f..67db89786e7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,6 +45,7 @@ struct wb_writeback_args { int for_kupdate:1; int range_cyclic:1; int for_background:1; + int sb_pinned:1; }; /* @@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, + /* + * Setting sb_pinned is not necessary for WB_SYNC_ALL, but + * lets make it explicitly clear. + */ + .sb_pinned = 1, }; struct bdi_work work; @@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * @bdi: the backing device to write from * @sb: write inodes from this super_block * @nr_pages: the number of pages to write + * @sb_locked: caller already holds sb umount sem. * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only * started when this function returns, we make no guarentees on - * completion. Caller need not hold sb s_umount semaphore. + * completion. Caller specifies whether sb umount sem is held already or not. * */ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) + long nr_pages, int sb_locked) { struct wb_writeback_args args = { .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, + .sb_pinned = sb_locked, }; /* @@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, /* * Caller must already hold the ref for this */ - if (wbc->sync_mode == WB_SYNC_ALL) { + if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); return SB_NOT_PINNED; } @@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb, .for_kupdate = args->for_kupdate, .for_background = args->for_background, .range_cyclic = args->range_cyclic, + .sb_pinned = args->sb_pinned, }; unsigned long oldest_jif; long wrote = 0; @@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } +static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) +{ + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); + long nr_to_write; + + nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + + bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); +} + /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1204,17 +1225,22 @@ static void wait_sb_inodes(struct super_block *sb) */ void writeback_inodes_sb(struct super_block *sb) { - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; - - nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + __writeback_inodes_sb(sb, 0); } EXPORT_SYMBOL(writeback_inodes_sb); +/** + * writeback_inodes_sb_locked - writeback dirty inodes from given super_block + * @sb: the superblock + * + * Like writeback_inodes_sb(), except the caller already holds the + * sb umount sem. + */ +void writeback_inodes_sb_locked(struct super_block *sb) +{ + __writeback_inodes_sb(sb, 1); +} + /** * writeback_inodes_sb_if_idle - start writeback if none underway * @sb: the superblock diff --git a/fs/sync.c b/fs/sync.c index 92b228176f7..de6a4419283 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (wait) sync_inodes_sb(sb); else - writeback_inodes_sb(sb); + writeback_inodes_sb_locked(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7534979d83b..ff8bac63213 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages); + long nr_pages, int sb_locked); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eb38a2c645f..47e1c686cb0 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -65,6 +65,15 @@ struct writeback_control { * so we use a single control to update them */ unsigned no_nrwrite_index_update:1; + + /* + * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, + * the writeback code will pin the sb for the caller. However, + * for eg umount, the caller does WB_SYNC_NONE but already has + * the sb pinned. If the below is set, caller already has the + * sb pinned. + */ + unsigned sb_pinned:1; }; /* @@ -73,6 +82,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_locked(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d0f2b3765f8..53b2fcf2d28 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_writeback(bdi, NULL, 0, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data) */ if (bdi_has_dirty_io(&q->backing_dev_info)) - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); + bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages); } /* -- cgit v1.2.3 From 6423104b6a1e6f0c18be60e8c33f02d263331d5e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 21 May 2010 20:00:35 +0200 Subject: writeback: fixups for !dirty_writeback_centisecs Commit 69b62d01 fixed up most of the places where we would enter busy schedule() spins when disabling the periodic background writeback. This fixes up the sb timer so that it doesn't get hammered on with the delay disabled, and ensures that it gets rearmed if needed when /proc/sys/vm/dirty_writeback_centisecs gets modified. bdi_forker_task() also needs to check for !dirty_writeback_centisecs and use schedule() appropriately, fix that up too. Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 + mm/backing-dev.c | 15 ++++++++++----- mm/page-writeback.c | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ff8bac63213..e6e0cb5437e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -109,6 +109,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, long nr_pages, int sb_locked); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); +void bdi_arm_supers_timer(void); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 707d0dc6da0..660a87a2251 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -48,7 +48,6 @@ static struct timer_list sync_supers_timer; static int bdi_sync_supers(void *); static void sync_supers_timer_fn(unsigned long); -static void arm_supers_timer(void); static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); @@ -252,7 +251,7 @@ static int __init default_bdi_init(void) init_timer(&sync_supers_timer); setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); - arm_supers_timer(); + bdi_arm_supers_timer(); err = bdi_init(&default_backing_dev_info); if (!err) @@ -374,10 +373,13 @@ static int bdi_sync_supers(void *unused) return 0; } -static void arm_supers_timer(void) +void bdi_arm_supers_timer(void) { unsigned long next; + if (!dirty_writeback_interval) + return; + next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; mod_timer(&sync_supers_timer, round_jiffies_up(next)); } @@ -385,7 +387,7 @@ static void arm_supers_timer(void) static void sync_supers_timer_fn(unsigned long unused) { wake_up_process(sync_supers_tsk); - arm_supers_timer(); + bdi_arm_supers_timer(); } static int bdi_forker_task(void *ptr) @@ -428,7 +430,10 @@ static int bdi_forker_task(void *ptr) spin_unlock_bh(&bdi_lock); wait = msecs_to_jiffies(dirty_writeback_interval * 10); - schedule_timeout(wait); + if (wait) + schedule_timeout(wait); + else + schedule(); try_to_freeze(); continue; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 53b2fcf2d28..0d7bbe85955 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -690,6 +690,7 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); + bdi_arm_supers_timer(); return 0; } -- cgit v1.2.3