From d882962f6af2b484b62a7fb05ef959e1bf355fc4 Mon Sep 17 00:00:00 2001 From: "Matthew L. Creech" Date: Fri, 4 Mar 2011 17:55:02 -0500 Subject: UBIFS: handle allocation failures in UBIFS write path Running kernel 2.6.37, my PPC-based device occasionally gets an order-2 allocation failure in UBIFS, which causes the root FS to become unwritable: kswapd0: page allocation failure. order:2, mode:0x4050 Call Trace: [c787dc30] [c00085b8] show_stack+0x7c/0x194 (unreliable) [c787dc70] [c0061aec] __alloc_pages_nodemask+0x4f0/0x57c [c787dd00] [c0061b98] __get_free_pages+0x20/0x50 [c787dd10] [c00e4f88] ubifs_jnl_write_data+0x54/0x200 [c787dd50] [c00e82d4] do_writepage+0x94/0x198 [c787dd90] [c00675e4] shrink_page_list+0x40c/0x77c [c787de40] [c0067de0] shrink_inactive_list+0x1e0/0x370 [c787de90] [c0068224] shrink_zone+0x2b4/0x2b8 [c787df00] [c0068854] kswapd+0x408/0x5d4 [c787dfb0] [c0037bcc] kthread+0x80/0x84 [c787dff0] [c000ef44] kernel_thread+0x4c/0x68 Similar problems were encountered last April by Tomasz Stanislawski: http://patchwork.ozlabs.org/patch/50965/ This patch implements Artem's suggested fix: fall back to a mutex-protected static buffer, allocated at mount time. I tested it by forcing execution down the failure path, and didn't see any ill effects. Artem: massaged the patch a little, improved it so that we'd not allocate the write reserve buffer when we are in R/O mode. Signed-off-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy --- fs/ubifs/journal.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'fs/ubifs/journal.c') diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 914f1bd89e5..aed25e86422 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, { struct ubifs_data_node *data; int err, lnum, offs, compr_type, out_len; - int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; + int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; struct ubifs_inode *ui = ubifs_inode(inode); dbg_jnl("ino %lu, blk %u, len %d, key %s", @@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, DBGKEY(key)); ubifs_assert(len <= UBIFS_BLOCK_SIZE); - data = kmalloc(dlen, GFP_NOFS); - if (!data) - return -ENOMEM; + data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); + if (!data) { + /* + * Fall-back to the write reserve buffer. Note, we might be + * currently on the memory reclaim path, when the kernel is + * trying to free some memory by writing out dirty pages. The + * write reserve buffer helps us to guarantee that we are + * always able to write the data. + */ + allocated = 0; + mutex_lock(&c->write_reserve_mutex); + data = c->write_reserve_buf; + } data->ch.node_type = UBIFS_DATA_NODE; key_write(c, key, &data->key); @@ -736,7 +746,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, goto out_ro; finish_reservation(c); - kfree(data); + if (!allocated) + mutex_unlock(&c->write_reserve_mutex); + else + kfree(data); return 0; out_release: @@ -745,7 +758,10 @@ out_ro: ubifs_ro_mode(c, err); finish_reservation(c); out_free: - kfree(data); + if (!allocated) + mutex_unlock(&c->write_reserve_mutex); + else + kfree(data); return err; } -- cgit v1.2.3