diff options
Diffstat (limited to 'fs')
38 files changed, 394 insertions, 158 deletions
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 63039ed9576..2bc5dc644b4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1864,6 +1864,7 @@ cleanup: kfree(psinfo); kfree(notes); kfree(fpu); + kfree(shdr4extnum); #ifdef ELF_CORE_COPY_XFPREGS kfree(xfpu); #endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f30ac05dbda..3b859a3e6a0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1335,6 +1335,11 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_STRING_ITEM_KEY 253 +/* + * Flags for mount options. + * + * Note: don't forget to add new options to btrfs_show_options() + */ #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f1cbd028f7b..98c68e658a9 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root( return root->fs_info->delayed_root; } -static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( - struct inode *inode) +static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) { - struct btrfs_delayed_node *node; struct btrfs_inode *btrfs_inode = BTRFS_I(inode); struct btrfs_root *root = btrfs_inode->root; u64 ino = btrfs_ino(inode); - int ret; + struct btrfs_delayed_node *node; -again: node = ACCESS_ONCE(btrfs_inode->delayed_node); if (node) { - atomic_inc(&node->refs); /* can be accessed */ + atomic_inc(&node->refs); return node; } @@ -102,8 +99,10 @@ again: node = radix_tree_lookup(&root->delayed_nodes_tree, ino); if (node) { if (btrfs_inode->delayed_node) { + atomic_inc(&node->refs); /* can be accessed */ + BUG_ON(btrfs_inode->delayed_node != node); spin_unlock(&root->inode_lock); - goto again; + return node; } btrfs_inode->delayed_node = node; atomic_inc(&node->refs); /* can be accessed */ @@ -113,6 +112,23 @@ again: } spin_unlock(&root->inode_lock); + return NULL; +} + +static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( + struct inode *inode) +{ + struct btrfs_delayed_node *node; + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); + struct btrfs_root *root = btrfs_inode->root; + u64 ino = btrfs_ino(inode); + int ret; + +again: + node = btrfs_get_delayed_node(inode); + if (node) + return node; + node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); if (!node) return ERR_PTR(-ENOMEM); @@ -548,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item( return next; } -static inline struct btrfs_delayed_node *btrfs_get_delayed_node( - struct inode *inode) -{ - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); - struct btrfs_delayed_node *delayed_node; - - delayed_node = btrfs_inode->delayed_node; - if (delayed_node) - atomic_inc(&delayed_node->refs); - - return delayed_node; -} - static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, u64 root_id) { @@ -1404,8 +1407,7 @@ end: int btrfs_inode_delayed_dir_index_count(struct inode *inode) { - struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node; - int ret = 0; + struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); if (!delayed_node) return -ENOENT; @@ -1415,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) * a new directory index is added into the delayed node and index_cnt * is updated now. So we needn't lock the delayed node. */ - if (!delayed_node->index_cnt) + if (!delayed_node->index_cnt) { + btrfs_release_delayed_node(delayed_node); return -EINVAL; + } BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; - return ret; + btrfs_release_delayed_node(delayed_node); + return 0; } void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, @@ -1613,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, inode->i_ctime.tv_nsec); } +int btrfs_fill_inode(struct inode *inode, u32 *rdev) +{ + struct btrfs_delayed_node *delayed_node; + struct btrfs_inode_item *inode_item; + struct btrfs_timespec *tspec; + + delayed_node = btrfs_get_delayed_node(inode); + if (!delayed_node) + return -ENOENT; + + mutex_lock(&delayed_node->mutex); + if (!delayed_node->inode_dirty) { + mutex_unlock(&delayed_node->mutex); + btrfs_release_delayed_node(delayed_node); + return -ENOENT; + } + + inode_item = &delayed_node->inode_item; + + inode->i_uid = btrfs_stack_inode_uid(inode_item); + inode->i_gid = btrfs_stack_inode_gid(inode_item); + btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); + inode->i_mode = btrfs_stack_inode_mode(inode_item); + inode->i_nlink = btrfs_stack_inode_nlink(inode_item); + inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); + BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); + BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); + inode->i_rdev = 0; + *rdev = btrfs_stack_inode_rdev(inode_item); + BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); + inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); + + inode->i_generation = BTRFS_I(inode)->generation; + BTRFS_I(inode)->index_cnt = (u64)-1; + + mutex_unlock(&delayed_node->mutex); + btrfs_release_delayed_node(delayed_node); + return 0; +} + int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index d1a6a2915c6..8d27af4bd8b 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -119,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode); int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_fill_inode(struct inode *inode, u32 *rdev); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1f61bf5b496..71cd456fdb6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4842,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, - int data) + u64 data) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -4869,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, space_info = __find_space_info(root->fs_info, data); if (!space_info) { - printk(KERN_ERR "No space info for %d\n", data); + printk(KERN_ERR "No space info for %llu\n", data); return -ENOSPC; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9f985a42987..bf0d61567f3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1893,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) while ((node = rb_last(&ctl->free_space_offset)) != NULL) { info = rb_entry(node, struct btrfs_free_space, offset_index); - unlink_free_space(ctl, info); - kfree(info->bitmap); - kmem_cache_free(btrfs_free_space_cachep, info); + if (!info->bitmap) { + unlink_free_space(ctl, info); + kmem_cache_free(btrfs_free_space_cachep, info); + } else { + free_bitmap(ctl, info); + } if (need_resched()) { spin_unlock(&ctl->tree_lock); cond_resched(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a9b10c5b0a..3601f0aebdd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode) int maybe_acls; u32 rdev; int ret; + bool filled = false; + + ret = btrfs_fill_inode(inode, &rdev); + if (!ret) + filled = true; path = btrfs_alloc_path(); BUG_ON(!path); @@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode) goto make_bad; leaf = path->nodes[0]; + + if (filled) + goto cache_acl; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); if (!leaf->map_token) @@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - +cache_acl: /* * try to precache a NULL acl entry for files that don't have * any xattrs or acls @@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode) } btrfs_free_path(path); - inode_item = NULL; switch (inode->i_mode & S_IFMT) { case S_IFREG: @@ -2670,12 +2678,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, int ret; /* - * If root is tree root, it means this inode is used to - * store free space information. And these inodes are updated - * when committing the transaction, so they needn't delaye to - * be updated, or deadlock will occured. + * If the inode is a free space inode, we can deadlock during commit + * if we put it into the delayed code. + * + * The data relocation inode should also be directly updated + * without delay */ - if (!is_free_space_inode(root, inode)) { + if (!is_free_space_inode(root, inode) + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) btrfs_set_inode_last_trans(trans, inode); @@ -4520,6 +4530,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_tree_add(inode); trace_btrfs_inode_new(inode); + btrfs_set_inode_last_trans(trans, inode); return inode; fail: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0bb4ebbb71b..15634d4648d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); + if (btrfs_test_opt(root, ENOSPC_DEBUG)) + seq_puts(seq, ",enospc_debug"); + if (btrfs_test_opt(root, AUTO_DEFRAG)) + seq_puts(seq, ",autodefrag"); + if (btrfs_test_opt(root, INODE_MAP_CACHE)) + seq_puts(seq, ",inode_cache"); return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1efa56e18f9..19450bc5363 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2098,7 +2098,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk_root->root_key.objectid, found_key.objectid, found_key.offset); - BUG_ON(ret && ret != -ENOSPC); + if (ret && ret != -ENOSPC) + goto error; key.offset = found_key.offset - 1; } ret = 0; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9542f07d0b9..4698a5c553d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -290,7 +290,6 @@ static int striped_read(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int io_align, page_align; - int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; struct page **page_pos; @@ -326,12 +325,11 @@ more: ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { - int didpages = - ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; + int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_off + read, + ceph_zero_page_vector_range(page_align + read, pos - off - read, pages); } pos += ret; @@ -356,7 +354,7 @@ more: left = inode->i_size - pos; dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_off + read, left, + ceph_zero_page_vector_range(page_align + read, left, pages); read += left; } @@ -478,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; - io_align = pos & ~PAGE_MASK; - buf_align = (unsigned long)data & ~PAGE_MASK; - ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -504,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, * boundary. this isn't atomic, unfortunately. :( */ more: + io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; len = left; if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of @@ -593,6 +590,7 @@ out: pos += len; written += len; left -= len; + data += written; if (left) goto more; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 35f9154615f..3e298997629 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -649,9 +649,9 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Devname: %s flags: %d ", dev_name, flags); - rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name); - if (rc) - return ERR_PTR(rc); + volume_info = cifs_get_volume_info((char *)data, dev_name); + if (IS_ERR(volume_info)) + return ERR_CAST(volume_info); cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); if (cifs_sb == NULL) { @@ -713,7 +713,7 @@ cifs_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); out: - cifs_cleanup_volume_info(&volume_info); + cifs_cleanup_volume_info(volume_info); return root; out_mountdata: diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 257f312ede4..8df28e925e5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -154,9 +154,9 @@ extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); -extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); -extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, - char *mount_data, const char *devname); +extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); +extern struct smb_vol *cifs_get_volume_info(char *mount_data, + const char *devname); extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7f540df5252..dbd669cc5bc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -65,6 +65,8 @@ static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); +static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname); /* * cifs tcp session reconnection @@ -2240,8 +2242,8 @@ cifs_match_super(struct super_block *sb, void *data) rc = compare_mount_options(sb, mnt_data); out: - cifs_put_tlink(tlink); spin_unlock(&cifs_tcp_ses_lock); + cifs_put_tlink(tlink); return rc; } @@ -2474,14 +2476,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) return rc; - rc = socket->ops->connect(socket, saddr, slen, 0); - if (rc < 0) { - cFYI(1, "Error %d connecting to server", rc); - sock_release(socket); - server->ssocket = NULL; - return rc; - } - /* * Eventually check for other socket options to change from * the default. sock_setsockopt not used because it expects @@ -2510,6 +2504,14 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); + rc = socket->ops->connect(socket, saddr, slen, 0); + if (rc < 0) { + cFYI(1, "Error %d connecting to server", rc); + sock_release(socket); + server->ssocket = NULL; + return rc; + } + if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); @@ -2830,15 +2832,9 @@ is_path_accessible(int xid, struct cifs_tcon *tcon, return rc; } -void -cifs_cleanup_volume_info(struct smb_vol **pvolume_info) +static void +cleanup_volume_info_contents(struct smb_vol *volume_info) { - struct smb_vol *volume_info; - - if (!pvolume_info || !*pvolume_info) - return; - - volume_info = *pvolume_info; kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); @@ -2846,28 +2842,44 @@ cifs_cleanup_volume_info(struct smb_vol **pvolume_info) kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); +} + +void +cifs_cleanup_volume_info(struct smb_vol *volume_info) +{ + if (!volume_info) + return; + cleanup_volume_info_contents(volume_info); kfree(volume_info); - *pvolume_info = NULL; - return; } + #ifdef CONFIG_CIFS_DFS_UPCALL /* build_path_to_root returns full path to root when * we do not have an exiting connection (tcon) */ static char * -build_unc_path_to_root(const struct smb_vol *volume_info, +build_unc_path_to_root(const struct smb_vol *vol, const struct cifs_sb_info *cifs_sb) { - char *full_path; + char *full_path, *pos; + unsigned int pplen = vol->prepath ? strlen(vol->prepath) : 0; + unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); - int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); - full_path = kmalloc(unc_len + 1, GFP_KERNEL); + full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); if (full_path == NULL) return ERR_PTR(-ENOMEM); - strncpy(full_path, volume_info->UNC, unc_len); - full_path[unc_len] = 0; /* add trailing null */ + strncpy(full_path, vol->UNC, unc_len); + pos = full_path + unc_len; + + if (pplen) { + strncpy(pos, vol->prepath, pplen); + pos += pplen; + } + + *pos = '\0'; /* add trailing null */ convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); + cFYI(1, "%s: full_path=%s", __func__, full_path); return full_path; } @@ -2910,15 +2922,18 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, &fake_devname); free_dfs_info_array(referrals, num_referrals); - kfree(fake_devname); - - if (cifs_sb->mountdata != NULL) - kfree(cifs_sb->mountdata); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; + } else { + cleanup_volume_info_contents(volume_info); + memset(volume_info, '\0', sizeof(*volume_info)); + rc = cifs_setup_volume_info(volume_info, mdata, + fake_devname); } + kfree(fake_devname); + kfree(cifs_sb->mountdata); cifs_sb->mountdata = mdata; } kfree(full_path); @@ -2926,33 +2941,20 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, } #endif -int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, - const char *devname) +static int +cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname) { - struct smb_vol *volume_info; int rc = 0; - *pvolume_info = NULL; - - volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); - if (!volume_info) { - rc = -ENOMEM; - goto out; - } - - if (cifs_parse_mount_options(mount_data, devname, - volume_info)) { - rc = -EINVAL; - goto out; - } + if (cifs_parse_mount_options(mount_data, devname, volume_info)) + return -EINVAL; if (volume_info->nullauth) { cFYI(1, "null user"); volume_info->username = kzalloc(1, GFP_KERNEL); - if (volume_info->username == NULL) { - rc = -ENOMEM; - goto out; - } + if (volume_info->username == NULL) + return -ENOMEM; } else if (volume_info->username) { /* BB fixme parse for domain name here */ cFYI(1, "Username: %s", volume_info->username); @@ -2960,8 +2962,7 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, cifserror("No username specified"); /* In userspace mount helper we can get user name from alternate locations such as env variables and files on disk */ - rc = -EINVAL; - goto out; + return -EINVAL; } /* this is needed for ASCII cp to Unicode converts */ @@ -2973,18 +2974,32 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, if (volume_info->local_nls == NULL) { cERROR(1, "CIFS mount error: iocharset %s not found", volume_info->iocharset); - rc = -ELIBACC; - goto out; + return -ELIBACC; } } - *pvolume_info = volume_info; - return rc; -out: - cifs_cleanup_volume_info(&volume_info); return rc; } +struct smb_vol * +cifs_get_volume_info(char *mount_data, const char *devname) +{ + int rc; + struct smb_vol *volume_info; + + volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); + if (!volume_info) + return ERR_PTR(-ENOMEM); + + rc = cifs_setup_volume_info(volume_info, mount_data, devname); + if (rc) { + cifs_cleanup_volume_info(volume_info); + volume_info = ERR_PTR(rc); + } + + return volume_info; +} + int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { @@ -2997,6 +3012,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) struct tcon_link *tlink; #ifdef CONFIG_CIFS_DFS_UPCALL int referral_walks_count = 0; +#endif rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); if (rc) @@ -3004,6 +3020,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; +#ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ if (referral_walks_count) { @@ -3012,7 +3029,6 @@ try_mount_again: else if (pSesInfo) cifs_put_smb_ses(pSesInfo); - cifs_cleanup_volume_info(&volume_info); FreeXid(xid); } #endif diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 816696621ec..42e5363b410 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -92,6 +92,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) if (cifsi->fscache) { cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + fscache_uncache_all_inode_pages(cifsi->fscache, inode); fscache_relinquish_cookie(cifsi->fscache, 1); cifsi->fscache = NULL; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index a2a5d19ece6..2f343b4d7a7 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op, pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); + +/* + * Uncache all the pages in an inode that are marked PG_fscache, assuming them + * to be associated with the given cookie. + */ +void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + pgoff_t next; + int i; + + _enter("%p,%p", cookie, inode); + + if (!mapping || mapping->nrpages == 0) { + _leave(" [no pages]"); + return; + } + + pagevec_init(&pvec, 0); + next = 0; + while (next <= (loff_t)-1 && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) + ) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + ASSERTCMP(page_index, >=, next); + next = page_index + 1; + + if (PageFsCache(page)) { + __fscache_wait_on_page_write(cookie, page); + __fscache_uncache_page(cookie, page); + } + } + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_all_inode_pages); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 802ac5eeba2..f9fbbe96c22 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1069,6 +1069,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) return 0; gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); head = bh = page_buffers(page); do { if (atomic_read(&bh->b_count)) @@ -1080,6 +1081,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) goto not_possible; bh = bh->b_this_page; } while(bh != head); + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); head = bh = page_buffers(page); @@ -1112,6 +1114,7 @@ not_possible: /* Should never happen */ WARN_ON(buffer_dirty(bh)); WARN_ON(buffer_pinned(bh)); cannot_release: + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); return 0; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 8ef70f46473..2cca29316bd 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -47,10 +47,10 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl) bd_ail_gl_list); bh = bd->bd_bh; gfs2_remove_from_ail(bd); - spin_unlock(&sdp->sd_ail_lock); - bd->bd_bh = NULL; bh->b_private = NULL; + spin_unlock(&sdp->sd_ail_lock); + bd->bd_blkno = bh->b_blocknr; gfs2_log_lock(sdp); gfs2_assert_withdraw(sdp, !buffer_busy(bh)); @@ -221,8 +221,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } } - if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) + if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { + gfs2_log_flush(gl->gl_sbd, NULL); gl->gl_sbd->sd_rindex_uptodate = 0; + } if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0a064e91ac7..81206e70cbf 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -17,6 +17,7 @@ #include <linux/buffer_head.h> #include <linux/rcupdate.h> #include <linux/rculist_bl.h> +#include <linux/completion.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -546,6 +547,7 @@ struct gfs2_sbd { struct gfs2_glock *sd_trans_gl; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; + struct completion sd_locking_init; /* Inode Stuff */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 903115f2bb3..85c62923ee2 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -903,6 +903,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) if (gfs2_ail1_empty(sdp)) break; } + gfs2_log_flush(sdp, NULL); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8ac9ae189b5..2a77071fb7b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -72,6 +72,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); + init_completion(&sdp->sd_locking_init); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -1017,11 +1018,13 @@ hostdata_error: fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); + complete(&sdp->sd_locking_init); return 0; } ret = lm->lm_mount(sdp, fsname); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); + complete(&sdp->sd_locking_init); return ret; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ed540e7018b..fb0edf73548 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -757,13 +757,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) struct timespec atime; struct gfs2_dinode *di; int ret = -EAGAIN; + int unlock_required = 0; /* Skip timestamp update, if this is from a memalloc */ if (current->flags & PF_MEMALLOC) goto do_flush; - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (ret) - goto do_flush; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto do_flush; + unlock_required = 1; + } ret = gfs2_trans_begin(sdp, RES_DINODE, 0); if (ret) goto do_unlock; @@ -780,7 +784,8 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) } gfs2_trans_end(sdp); do_unlock: - gfs2_glock_dq_uninit(&gh); + if (unlock_required) + gfs2_glock_dq_uninit(&gh); do_flush: if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); @@ -1427,7 +1432,20 @@ out: return error; } -/* +/** + * gfs2_evict_inode - Remove an inode from cache + * @inode: The inode to evict + * + * There are three cases to consider: + * 1. i_nlink == 0, we are final opener (and must deallocate) + * 2. i_nlink == 0, we are not the final opener (and cannot deallocate) + * 3. i_nlink > 0 + * + * If the fs is read only, then we have to treat all cases as per #3 + * since we are unable to do any deallocation. The inode will be + * deallocated by the next read/write node to attempt an allocation + * in the same resource group + * * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and * taking the exclusive lock. I'd rather do a shared -> exclusive @@ -1470,6 +1488,8 @@ static void gfs2_evict_inode(struct inode *inode) if (error) goto out_truncate; + /* Case 1 starts here */ + if (S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { error = gfs2_dir_exhash_dealloc(ip); @@ -1493,13 +1513,16 @@ static void gfs2_evict_inode(struct inode *inode) goto out_unlock; out_truncate: + /* Case 2 starts here */ error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) goto out_unlock; - gfs2_final_release_pages(ip); + /* Needs to be done before glock release & also in a transaction */ + truncate_inode_pages(&inode->i_data, 0); gfs2_trans_end(sdp); out_unlock: + /* Error path for case 1 */ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); @@ -1507,6 +1530,7 @@ out_unlock: if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: + /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index e20eab37bc8..443cabcfcd2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -338,6 +338,9 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = sscanf(buf, "%u", &first); if (rv != 1 || first > 1) return -EINVAL; + rv = wait_for_completion_killable(&sdp->sd_locking_init); + if (rv) + return rv; spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) @@ -414,7 +417,9 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = sscanf(buf, "%d", &jid); if (rv != 1) return -EINVAL; - + rv = wait_for_completion_killable(&sdp->sd_locking_init); + if (rv) + return rv; spin_lock(&sdp->sd_jindex_spin); rv = -EINVAL; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b49b55584c8..84a47b709f5 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -500,7 +500,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) out_put_hidden_dir: iput(sbi->hidden_dir); out_put_root: - iput(sbi->alloc_file); + iput(root); out_put_alloc_file: iput(sbi->alloc_file); out_close_cat_tree: diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 3031d81f5f0..4ac88ff79aa 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -36,6 +36,7 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; + int ret = 0; bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; @@ -54,8 +55,10 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, wait_for_completion(&wait); if (!bio_flagged(bio, BIO_UPTODATE)) - return -EIO; - return 0; + ret = -EIO; + + bio_put(bio); + return ret; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803..43566d17d1b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); diff --git a/fs/locks.c b/fs/locks.c index 0a4f50dfadf..b286539d547 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -160,10 +160,28 @@ EXPORT_SYMBOL_GPL(unlock_flocks); static struct kmem_cache *filelock_cache __read_mostly; +static void locks_init_lock_always(struct file_lock *fl) +{ + fl->fl_next = NULL; + fl->fl_fasync = NULL; + fl->fl_owner = NULL; + fl->fl_pid = 0; + fl->fl_nspid = NULL; + fl->fl_file = NULL; + fl->fl_flags = 0; + fl->fl_type = 0; + fl->fl_start = fl->fl_end = 0; +} + /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { - return kmem_cache_alloc(filelock_cache, GFP_KERNEL); + struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL); + + if (fl) + locks_init_lock_always(fl); + + return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lock); @@ -200,17 +218,9 @@ void locks_init_lock(struct file_lock *fl) INIT_LIST_HEAD(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); - fl->fl_next = NULL; - fl->fl_fasync = NULL; - fl->fl_owner = NULL; - fl->fl_pid = 0; - fl->fl_nspid = NULL; - fl->fl_file = NULL; - fl->fl_flags = 0; - fl->fl_type = 0; - fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + locks_init_lock_always(fl); } EXPORT_SYMBOL(locks_init_lock); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ce153a6b3ae..419119c371b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode) dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - /* Need to invalidate any mapped pages that were read in before - * turning off the cache. + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. */ - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2(inode->i_mapping); - + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); nfs_fscache_zap_inode_cookie(inode); } } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0bafcc91c27..f9d03abcd04 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -398,7 +398,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) * this offset and save the original offset. */ data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; /* Perform an asynchronous write */ status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6870bc61cee..e6e8f3b9a1d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -91,7 +91,7 @@ static int nfs4_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define nfs4_fattr_bitmap_maxsz 3 +#define nfs4_fattr_bitmap_maxsz 4 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e268e3b2349..72716805968 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -864,6 +864,8 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; + /* pnfs_set_layoutcommit needs this */ + data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; diff --git a/fs/proc/base.c b/fs/proc/base.c index 8a84210ca08..fc5bc276769 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2708,6 +2708,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) struct task_io_accounting acct = task->ioac; unsigned long flags; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + return -EACCES; + if (whole && lock_task_sighand(task, &flags)) { struct task_struct *t = task; @@ -2839,7 +2842,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, proc_tgid_io_accounting), + INF("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), @@ -3181,7 +3184,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, proc_tid_io_accounting), + INF("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_HARDWALL INF("hardwall", S_IRUGO, proc_pid_hardwall), diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index f0511e81696..eed99428f10 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -27,14 +27,18 @@ static unsigned long romfs_get_unmapped_area(struct file *file, { struct inode *inode = file->f_mapping->host; struct mtd_info *mtd = inode->i_sb->s_mtd; - unsigned long isize, offset; + unsigned long isize, offset, maxpages, lpages; if (!mtd) goto cant_map_directly; + /* the mapping mustn't extend beyond the EOF */ + lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; isize = i_size_read(inode); offset = pgoff << PAGE_SHIFT; - if (offset > isize || len > isize || offset > isize - len) + + maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT; + if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) return (unsigned long) -EINVAL; /* we need to call down to the MTD layer to do the actual mapping */ diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c8637537881..01d2072fb6d 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -490,6 +490,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) args.whichfork = XFS_ATTR_FORK; /* + * we have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail. + */ + args.op_flags = XFS_DA_OP_OKNOENT; + + /* * Attach the dquots to the inode. */ error = xfs_qm_dqattach(dp, 0); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index cb9b6d1469f..3631783b2b5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -253,16 +253,21 @@ xfs_iget_cache_hit( rcu_read_lock(); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_INEW; - ip->i_flags |= XFS_IRECLAIMABLE; - __xfs_inode_set_reclaim_tag(pag, ip); + ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; } spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); + + /* + * Clear the per-lifetime state in the inode as we are now + * effectively a new inode and need to return to the initial + * state before reuse occurs. + */ + ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); inode->i_state = I_NEW; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3ae6d58e547..964cfea7768 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -384,6 +384,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ /* + * Per-lifetime flags need to be reset when re-using a reclaimable inode during + * inode lookup. Thi prevents unintended behaviour on the new inode from + * ocurring. + */ +#define XFS_IRECLAIM_RESET_FLAGS \ + (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ + XFS_IFILESTREAM); + +/* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) * 1<<16 - 1<<32-1 -- lockdep annotation (integers) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 09983a3344a..b1e88d56069 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -681,15 +681,15 @@ xfs_inode_item_unlock( * where the cluster buffer may be unpinned before the inode is inserted into * the AIL during transaction committed processing. If the buffer is unpinned * before the inode item has been committed and inserted, then it is possible - * for the buffer to be written and IO completions before the inode is inserted + * for the buffer to be written and IO completes before the inode is inserted * into the AIL. In that case, we'd be inserting a clean, stale inode into the * AIL which will never get removed. It will, however, get reclaimed which * triggers an assert in xfs_inode_free() complaining about freein an inode * still in the AIL. * - * To avoid this, return a lower LSN than the one passed in so that the - * transaction committed code will not move the inode forward in the AIL but - * will still unpin it properly. + * To avoid this, just unpin the inode directly and return a LSN of -1 so the + * transaction committed code knows that it does not need to do any further + * processing on the item. */ STATIC xfs_lsn_t xfs_inode_item_committed( @@ -699,8 +699,10 @@ xfs_inode_item_committed( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - if (xfs_iflags_test(ip, XFS_ISTALE)) - return lsn - 1; + if (xfs_iflags_test(ip, XFS_ISTALE)) { + xfs_inode_item_unpin(lip, 0); + return -1; + } return lsn; } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7c7bc2b786b..c83f63b33aa 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1361,7 +1361,7 @@ xfs_trans_item_committed( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* If the committed routine returns -1, item has been freed. */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) return; @@ -1474,7 +1474,7 @@ xfs_trans_committed_bulk( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* item_lsn of -1 means the item was freed */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b7a5fe7c52c..619720705bc 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -960,8 +960,11 @@ xfs_release( * be exposed to that problem. */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + if (truncated) { + xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); + if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + } } if (ip->i_d.di_nlink == 0) |