From b93d87c19821ba7d3ee11557403d782e541071ad Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 16:37:57 -0500 Subject: nfsd4: fix lockowner matching Lockowners are looked up by file as well as by owner, but we were forgetting to do a comparison on the file. This could cause an incorrect result from lockt. (Note looking up the inode from the lockowner is pretty awkward here. The data structures need fixing.) Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 47e94e33a97..5abced7a740 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3809,16 +3809,29 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; struct nfs4_stateowner *op; list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return lockowner(op); + lo = lockowner(op); + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } -- cgit v1.2.3 From 684e563858018d27acb8f00e30c026215bbd0ffb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 4 Nov 2011 17:08:10 -0400 Subject: nfsd4: cleanup lock clientid handling in sessions case I'd rather the "ignore clientid in sessions case" rule be enforced in just one place. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5abced7a740..9354ddebbee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3946,10 +3946,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * lock stateid. */ struct nfs4_ol_stateid *open_stp = NULL; - + + if (nfsd4_has_session(cstate)) + /* See rfc 5661 18.10.3: given clientid is ignored: */ + memcpy(&lock->v.new.clientid, + &cstate->session->se_client->cl_clientid, + sizeof(clientid_t)); + status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && - STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid)) goto out; /* validate and update open stateid and open seqid */ @@ -3961,8 +3966,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; - if (!nfsd4_has_session(cstate) && - !same_clid(&open_sop->oo_owner.so_client->cl_clientid, + if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->v.new.clientid)) goto out; /* create lockowner and lock stateid */ -- cgit v1.2.3 From 64a284d07c7d84299a90826950079a8ef11e8204 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Oct 2011 06:57:46 -0400 Subject: nfsd4: maintain one seqid stream per (lockowner, file) Instead of creating a new lockowner and stateid for every open_to_lockowner call, reuse the existing lockowner if it exists. Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9354ddebbee..d09d5242c08 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3905,6 +3905,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) __set_bit(access, &lock_stp->st_access_bmap); } +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ + struct nfs4_file *fi = ost->st_file; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *cl = oo->oo_owner.so_client; + struct nfs4_lockowner *lo; + unsigned int strhashval; + + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + if (lo) { + if (!cstate->minorversion) + return nfserr_bad_seqid; + /* XXX: a lockowner always has exactly one stateid: */ + *lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return nfs_ok; + } + strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner(lo); + return nfserr_jukebox; + } + *new = true; + return nfs_ok; +} + /* * LOCK operation */ @@ -3920,7 +3951,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock file_lock; struct file_lock conflock; __be32 status = 0; - unsigned int strhashval; + bool new_state = false; int lkflg; int err; @@ -3969,21 +4000,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->v.new.clientid)) goto out; - /* create lockowner and lock stateid */ - fp = open_stp->st_file; - strhashval = lock_ownerstr_hashval(fp->fi_inode, - open_sop->oo_owner.so_client->cl_clientid.cl_id, - &lock->v.new.owner); - /* XXX: Do we need to check for duplicate stateowners on - * the same file, or should they just be allowed (and - * create new stateids)? */ - status = nfserr_jukebox; - lock_sop = alloc_init_lock_stateowner(strhashval, - open_sop->oo_owner.so_client, open_stp, lock); - if (lock_sop == NULL) - goto out; - lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); - if (lock_stp == NULL) + status = lookup_or_create_lock_state(cstate, open_stp, lock, + &lock_stp, &new_state); + if (status) goto out; } else { /* lock (lock owner + lock stateid) already exists */ @@ -3993,10 +4012,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, NFS4_LOCK_STID, &lock_stp); if (status) goto out; - lock_sop = lockowner(lock_stp->st_stateowner); - fp = lock_stp->st_file; } - /* lock_sop and lock_stp have been created or found */ + lock_sop = lockowner(lock_stp->st_stateowner); + fp = lock_stp->st_file; lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); @@ -4071,7 +4089,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: - if (status && lock->lk_is_new && lock_sop) + if (status && new_state) release_lockowner(lock_sop); if (!cstate->replay_owner) nfs4_unlock_state(); -- cgit v1.2.3 From 65178db42a02c7984f711614546e97e9952d8e01 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 13:35:21 -0400 Subject: NFSD: Added fault injection Fault injection on the NFS server makes it easier to test the client's state manager and recovery threads. Simulating errors on the server is easier than finding the right conditions that cause them naturally. This patch uses debugfs to add a simple framework for fault injection to the server. This framework is a config option, and can be enabled through CONFIG_NFSD_FAULT_INJECTION. Assuming you have debugfs mounted to /sys/debug, a set of files will be created in /sys/debug/nfsd/. Writing to any of these files will cause the corresponding action and write a log entry to dmesg. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/Kconfig | 10 +++++ fs/nfsd/Makefile | 1 + fs/nfsd/fault_inject.c | 91 ++++++++++++++++++++++++++++++++++++++ fs/nfsd/fault_inject.h | 28 ++++++++++++ fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsctl.c | 7 +++ 6 files changed, 252 insertions(+) create mode 100644 fs/nfsd/fault_inject.c create mode 100644 fs/nfsd/fault_inject.h (limited to 'fs') diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10e6366608f..8df1ea4a6ff 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -80,3 +80,13 @@ config NFSD_V4 available from http://linux-nfs.org/. If unsure, say N. + +config NFSD_FAULT_INJECTION + bool "NFS server manual fault injection" + depends on NFSD_V4 && DEBUG_KERNEL + help + This option enables support for manually injecting faults + into the NFS server. This is intended to be used for + testing error recovery on the NFS client. + + If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee2019..af32ef06b4f 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 00000000000..ce7f0758d84 --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 Bryan Schumaker + * + * Uses debugfs to create fault injection points for client testing + */ + +#include +#include +#include +#include + +#include "state.h" +#include "fault_inject.h" + +struct nfsd_fault_inject_op { + char *file; + void (*func)(u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .func = nfsd_forget_clients, + }, + { + .file = "forget_locks", + .func = nfsd_forget_locks, + }, + { + .file = "forget_openowners", + .func = nfsd_forget_openowners, + }, + { + .file = "forget_delegations", + .func = nfsd_forget_delegations, + }, + { + .file = "recall_delegations", + .func = nfsd_recall_delegations, + }, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static int nfsd_inject_set(void *op_ptr, u64 val) +{ + struct nfsd_fault_inject_op *op = op_ptr; + + if (val == 0) + printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); + else + printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + + op->func(val); + return 0; +} + +static int nfsd_inject_get(void *data, u64 *val) +{ + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); + +void nfsd_fault_inject_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ + unsigned int i; + struct nfsd_fault_inject_op *op; + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + + debug_dir = debugfs_create_dir("nfsd", NULL); + if (!debug_dir) + goto fail; + + for (i = 0; i < NUM_INJECT_OPS; i++) { + op = &inject_ops[i]; + if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) + goto fail; + } + return 0; + +fail: + nfsd_fault_inject_cleanup(); + return -ENOMEM; +} diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 00000000000..90bd0570956 --- /dev/null +++ b/fs/nfsd/fault_inject.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2011 Bryan Schumaker + * + * Function definitions for fault injection + */ + +#ifndef LINUX_NFSD_FAULT_INJECT_H +#define LINUX_NFSD_FAULT_INJECT_H + +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +void nfsd_forget_clients(u64); +void nfsd_forget_locks(u64); +void nfsd_forget_openowners(u64); +void nfsd_forget_delegations(u64); +void nfsd_recall_delegations(u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +static inline void nfsd_forget_clients(u64 num) {} +static inline void nfsd_forget_locks(u64 num) {} +static inline void nfsd_forget_openowners(u64 num) {} +static inline void nfsd_forget_delegations(u64 num) {} +static inline void nfsd_recall_delegations(u64 num) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d09d5242c08..a511eff0569 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4429,6 +4429,121 @@ nfs4_check_open_reclaim(clientid_t *clid) return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; } +#ifdef CONFIG_NFSD_FAULT_INJECTION + +void nfsd_forget_clients(u64 num) +{ + struct nfs4_client *clp, *next; + int count = 0; + + nfs4_lock_state(); + list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { + nfsd4_remove_clid_dir(clp); + expire_client(clp); + if (++count == num) + break; + } + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d clients", count); +} + +static void release_lockowner_sop(struct nfs4_stateowner *sop) +{ + release_lockowner(lockowner(sop)); +} + +static void release_openowner_sop(struct nfs4_stateowner *sop) +{ + release_openowner(openowner(sop)); +} + +static int nfsd_release_n_owners(u64 num, + struct list_head hashtbl[], + unsigned int hashtbl_size, + void (*release_sop)(struct nfs4_stateowner *)) +{ + int i, count = 0; + struct nfs4_stateowner *sop, *next; + + for (i = 0; i < hashtbl_size; i++) { + list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) { + release_sop(sop); + if (++count == num) + return count; + } + } + return count; +} + +void nfsd_forget_locks(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl, + LOCK_HASH_SIZE, release_lockowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d locks", count); +} + +void nfsd_forget_openowners(u64 num) +{ + int count; + + nfs4_lock_state(); + count = nfsd_release_n_owners(num, open_ownerstr_hashtbl, + OPEN_OWNER_HASH_SIZE, release_openowner_sop); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d open owners", count); +} + +int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +{ + int i, count = 0; + struct nfs4_file *fp; + struct nfs4_delegation *dp, *next; + + for (i = 0; i < FILE_HASH_SIZE; i++) { + list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) { + deleg_func(dp); + if (++count == num) + return count; + } + } + } + return count; +} + +void nfsd_forget_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + count = nfsd_process_n_delegations(num, unhash_delegation); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Forgot %d delegations", count); +} + +void nfsd_recall_delegations(u64 num) +{ + unsigned int count; + + nfs4_lock_state(); + spin_lock(&recall_lock); + count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); + spin_unlock(&recall_lock); + nfs4_unlock_state(); + + printk(KERN_INFO "NFSD: Recalled %d delegations", count); +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + /* initialization to perform at module load time: */ int diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c45a2ea4a09..b2e8093ebc2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -18,6 +18,7 @@ #include "idmap.h" #include "nfsd.h" #include "cache.h" +#include "fault_inject.h" /* * We have a single directory with several nodes in it. @@ -1131,6 +1132,9 @@ static int __init init_nfsd(void) retval = nfs4_state_init(); /* nfs4 locking state */ if (retval) return retval; + retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ + if (retval) + goto out_free_slabs; nfsd_stat_init(); /* Statistics */ retval = nfsd_reply_cache_init(); if (retval) @@ -1161,6 +1165,8 @@ out_free_cache: nfsd_reply_cache_shutdown(); out_free_stat: nfsd_stat_shutdown(); + nfsd_fault_inject_cleanup(); +out_free_slabs: nfsd4_free_slabs(); return retval; } @@ -1175,6 +1181,7 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd_idmap_shutdown(); nfsd4_free_slabs(); + nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); } -- cgit v1.2.3 From 72083396074035ffa5cf81b6bb3e55f1d615badf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 15:24:59 -0400 Subject: NFSD: Call nfsd4_init_slabs() from init_nfsd() init_nfsd() was calling free_slabs() during cleanup code, but the call to init_slabs() was hidden in nfsd4_state_init(). This could be confusing to people unfamiliar with the code. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++------- fs/nfsd/nfsctl.c | 3 ++- fs/nfsd/nfsd.h | 6 ++++-- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a511eff0569..1d6812db509 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2301,7 +2301,7 @@ nfsd4_free_slabs(void) nfsd4_free_slab(&deleg_slab); } -static int +int nfsd4_init_slabs(void) { openowner_slab = kmem_cache_create("nfsd4_openowners", @@ -4546,14 +4546,11 @@ void nfsd_recall_delegations(u64 num) /* initialization to perform at module load time: */ -int +void nfs4_state_init(void) { - int i, status; + int i; - status = nfsd4_init_slabs(); - if (status) - return status; for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&conf_id_hashtbl[i]); INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -4577,7 +4574,6 @@ nfs4_state_init(void) INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); reclaim_str_hashtbl_size = 0; - return 0; } static void diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b2e8093ebc2..8daa935f329 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1129,9 +1129,10 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfs4_state_init(); /* nfs4 locking state */ + retval = nfsd4_init_slabs(); if (retval) return retval; + nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 58134a23fdf..4e2ee29f2cf 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq) */ #ifdef CONFIG_NFSD_V4 extern unsigned int max_delegations; -int nfs4_state_init(void); +void nfs4_state_init(void); +int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); void nfs4_state_shutdown(void); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); #else -static inline int nfs4_state_init(void) { return 0; } +static inline void nfs4_state_init(void) { } +static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } static inline void nfs4_state_shutdown(void) { } -- cgit v1.2.3 From c7e8472cf8ae877b232eb506284a5b15cf7efb2c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 1 Nov 2011 14:18:28 -0400 Subject: NFSD: Remove unnecessary whitespace The close parenthesis was hard to find with it spaced so far over. Signed-off-by: Bryan Schumaker [bfields@redhat.com: get all these lines under 80 chars while we're here] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 4e2ee29f2cf..1d1e8589b4c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -340,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ -#define NFSD_WRITEONLY_ATTRS_WORD1 \ -(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEONLY_ATTRS_WORD1 \ + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0 \ -(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) -#define NFSD_WRITEABLE_ATTRS_WORD1 \ -(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD0 \ + (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #define NFSD_WRITEABLE_ATTRS_WORD2 0 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ -- cgit v1.2.3 From 06f1f864d4ae5804e83785308d41f14a08e4b980 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 16:58:18 -0500 Subject: nfsd4: hash lockowners to simplify RELEASE_LOCKOWNER Hash lockowners on just the owner string rather than on (owner, inode). This makes the owner-string lookup needed for RELEASE_LOCKOWNER simpler (currently it's doing at a linear search through the entire hash table!). That may come at the expense of making (owner, inode) lookups more expensive if a client reuses the same lockowner across multiple files. We might add a separate lookup for that. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1d6812db509..eec990022ad 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3746,15 +3746,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static inline unsigned int -lock_ownerstr_hashval(struct inode *inode, u32 cl_id, - struct xdr_netobj *ownername) -{ - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCK_HASH_MASK; -} - static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; /* @@ -3824,7 +3815,7 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); struct nfs4_lockowner *lo; struct nfs4_stateowner *op; @@ -3847,7 +3838,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * - * strhashval = lock_ownerstr_hashval + * strhashval = open_ownerstr_hashval */ static struct nfs4_lockowner * @@ -3922,7 +3913,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n struct nfs4_ol_stateid, st_perstateowner); return nfs_ok; } - strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id, + strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id, &lock->v.new.owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) @@ -4286,7 +4277,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - int i; + unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4301,22 +4292,17 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfserr_locks_held; - /* XXX: we're doing a linear search through all the lockowners. - * Yipes! For now we'll just hope clients aren't really using - * release_lockowner much, but eventually we have to fix these - * data structures. */ INIT_LIST_HEAD(&matches); - for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); - } + + list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) { + if (!same_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) + goto out; + list_add(&lo->lo_list, &matches); } } /* Clients probably won't expect us to return with some (but not all) -- cgit v1.2.3 From 16bfdaafa2c66d8cc81422a97a105a849ca65b3e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 17:23:30 -0500 Subject: nfsd4: share open and lock owner hash tables Now that they're used in the same way, it's a little simpler to put open and lock owners in the same hash table, and I can't see a reason not to. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eec990022ad..dcbe7f37759 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -133,21 +133,21 @@ unsigned int max_delegations; * Open owner state (share locks) */ -/* hash tables for open owners */ -#define OPEN_OWNER_HASH_BITS 8 -#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) -#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) +/* hash tables for lock and open owners */ +#define OWNER_HASH_BITS 8 +#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); ret += clientid; - return ret & OPEN_OWNER_HASH_MASK; + return ret & OWNER_HASH_MASK; } -static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; +static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 @@ -2373,7 +2373,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2436,7 +2436,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) struct nfs4_stateowner *so; struct nfs4_openowner *oo; - list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { oo = openowner(so); renew_client(oo->oo_owner.so_client); @@ -2580,7 +2582,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); oo = find_openstateowner_str(strhashval, open); open->op_openowner = oo; if (!oo) { @@ -3718,13 +3720,7 @@ out: } -/* - * Lock owner state (byte-range locks) - */ #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCK_HASH_BITS 8 -#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) -#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) static inline u64 end_offset(u64 start, u64 len) @@ -3746,8 +3742,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3815,11 +3809,13 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); struct nfs4_lockowner *lo; struct nfs4_stateowner *op; - list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) { + if (op->so_is_open_owner) + continue; lo = lockowner(op); if (same_lockowner_ino(lo, inode, clid, owner)) return lo; @@ -3829,7 +3825,7 @@ find_lockowner_str(struct inode *inode, clientid_t *clid, static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) { - list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -3838,7 +3834,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * - * strhashval = open_ownerstr_hashval + * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * @@ -3913,7 +3909,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n struct nfs4_ol_stateid, st_perstateowner); return nfs_ok; } - strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id, + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, &lock->v.new.owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) @@ -4277,7 +4273,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4294,7 +4290,9 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, status = nfserr_locks_held; INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + if (sop->so_is_open_owner) + continue; if (!same_owner_str(sop, owner, clid)) continue; list_for_each_entry(stp, &sop->so_stateids, @@ -4444,16 +4442,16 @@ static void release_openowner_sop(struct nfs4_stateowner *sop) release_openowner(openowner(sop)); } -static int nfsd_release_n_owners(u64 num, - struct list_head hashtbl[], - unsigned int hashtbl_size, +static int nfsd_release_n_owners(u64 num, bool is_open_owner void (*release_sop)(struct nfs4_stateowner *)) { int i, count = 0; struct nfs4_stateowner *sop, *next; - for (i = 0; i < hashtbl_size; i++) { - list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) { + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { + if (sop->so_is_open_owner != is_open_owner) + continue; release_sop(sop); if (++count == num) return count; @@ -4467,8 +4465,7 @@ void nfsd_forget_locks(u64 num) int count; nfs4_lock_state(); - count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl, - LOCK_HASH_SIZE, release_lockowner_sop); + count = nfsd_release_n_owners(num, false, release_lockowner_sop); nfs4_unlock_state(); printk(KERN_INFO "NFSD: Forgot %d locks", count); @@ -4479,8 +4476,7 @@ void nfsd_forget_openowners(u64 num) int count; nfs4_lock_state(); - count = nfsd_release_n_owners(num, open_ownerstr_hashtbl, - OPEN_OWNER_HASH_SIZE, release_openowner_sop); + count = nfsd_release_n_owners(num, true, release_openowner_sop); nfs4_unlock_state(); printk(KERN_INFO "NFSD: Forgot %d open owners", count); @@ -4549,11 +4545,8 @@ nfs4_state_init(void) for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); - } - for (i = 0; i < LOCK_HASH_SIZE; i++) { - INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); + for (i = 0; i < OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&ownerstr_hashtbl[i]); } memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); -- cgit v1.2.3 From 63894ab9f63a688f6b0b8cdd01ac0a9f36d507b8 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 1 Nov 2011 10:06:19 +0800 Subject: ext3: call ext3_mark_recovery_complete() when recovery is really needed Call ext3_mark_recovery_complete() in ext3_fill_super() only if needs_recovery is non-zero. Besides that, print out "recovery complete" message after calling ext3_mark_recovery_complete(). Cc: Jan Kara Signed-off-by: Eryu Guan Signed-off-by: Jan Kara --- fs/ext3/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 922d289aeeb..767fa3a2bd1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2060,9 +2060,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; ext3_orphan_cleanup(sb, es); EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; - if (needs_recovery) + if (needs_recovery) { + ext3_mark_recovery_complete(sb, es); ext3_msg(sb, KERN_INFO, "recovery complete"); - ext3_mark_recovery_complete(sb, es); + } ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": -- cgit v1.2.3 From 353de31b86850309b205a3d4cc1294052471b14d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Nov 2011 19:25:03 -0500 Subject: nfsd4: fix CONFIG_NFSD_FAULT_INJECTION compile error Reported-by: Stephen Rothwell Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dcbe7f37759..7e4edbde737 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4442,7 +4442,7 @@ static void release_openowner_sop(struct nfs4_stateowner *sop) release_openowner(openowner(sop)); } -static int nfsd_release_n_owners(u64 num, bool is_open_owner +static int nfsd_release_n_owners(u64 num, bool is_open_owner, void (*release_sop)(struct nfs4_stateowner *)) { int i, count = 0; -- cgit v1.2.3 From 009673b439cf74d70a486fca0177e274febd81a7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Nov 2011 17:40:10 -0500 Subject: nfsd4: add a separate (lockowner, inode) lookup Address the possible performance regression mentioned in "nfsd4: hash lockowners to simplify RELEASE_LOCKOWNER" by providing a separate (lockowner, inode) hash. Really, I doubt this matters much, but I think it's likely we'll change these data structures here and I'd rather that the need for (owner, inode) lookups be well-documented. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 29 +++++++++++++++++++++++------ fs/nfsd/state.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7e4edbde737..a84978c13bb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -514,6 +514,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) list_del(&lo->lo_owner.so_strhash); list_del(&lo->lo_perstateid); + list_del(&lo->lo_owner_ino_hash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); @@ -3722,6 +3723,10 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) + static inline u64 end_offset(u64 start, u64 len) { @@ -3742,6 +3747,15 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) +{ + return (file_hashval(inode) + cl_id + + opaque_hashval(ownername->data, ownername->len)) + & LOCKOWNER_INO_HASH_MASK; +} + +static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; + /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3809,14 +3823,10 @@ static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { - unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - struct nfs4_stateowner *op; - list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) { - if (op->so_is_open_owner) - continue; - lo = lockowner(op); + list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { if (same_lockowner_ino(lo, inode, clid, owner)) return lo; } @@ -3825,7 +3835,12 @@ find_lockowner_str(struct inode *inode, clientid_t *clid, static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) { + struct inode *inode = open_stp->st_file->fi_inode; + unsigned int inohash = lockowner_ino_hashval(inode, + clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -4548,6 +4563,8 @@ nfs4_state_init(void) for (i = 0; i < OWNER_HASH_SIZE; i++) { INIT_LIST_HEAD(&ownerstr_hashtbl[i]); } + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a3cf38476a1..89c2cd84d79 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -366,6 +366,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_owner_ino_hash; /* hash by owner,file */ struct list_head lo_perstateid; /* for lockowners only */ struct list_head lo_list; /* for temporary uses */ }; -- cgit v1.2.3 From 9beb3bf5a92bb8fc6503f844bf0772df29f14a02 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 26 Oct 2011 15:24:55 -0500 Subject: dlm: convert rsb list to rb_tree Change the linked lists to rb_tree's in the rsb hash table to speed up searches. Slow rsb searches were having a large impact on gfs2 performance due to the large number of dlm locks gfs2 uses. Signed-off-by: Bob Peterson Signed-off-by: David Teigland --- fs/dlm/debug_fs.c | 28 +++++++++-------- fs/dlm/dlm_internal.h | 9 ++++-- fs/dlm/lock.c | 87 +++++++++++++++++++++++++++++++++++++++++---------- fs/dlm/lockspace.c | 23 ++++++-------- fs/dlm/recover.c | 21 ++++++++----- 5 files changed, 113 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 59779237e2b..3dca2b39e83 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -393,6 +393,7 @@ static const struct seq_operations format3_seq_ops; static void *table_seq_start(struct seq_file *seq, loff_t *pos) { + struct rb_node *node; struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri; struct dlm_rsb *r; @@ -418,9 +419,10 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) ri->format = 3; spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, - res_hashchain) { + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { + for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; + node = rb_next(node)) { + r = rb_entry(node, struct dlm_rsb, res_hashnode); if (!entry--) { dlm_hold_rsb(r); ri->rsb = r; @@ -449,9 +451,9 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) } spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, - struct dlm_rsb, res_hashchain); + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { + node = rb_first(&ls->ls_rsbtbl[bucket].keep); + r = rb_entry(node, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; ri->bucket = bucket; @@ -467,7 +469,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { struct dlm_ls *ls = seq->private; struct rsbtbl_iter *ri = iter_ptr; - struct list_head *next; + struct rb_node *next; struct dlm_rsb *r, *rp; loff_t n = *pos; unsigned bucket; @@ -480,10 +482,10 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) spin_lock(&ls->ls_rsbtbl[bucket].lock); rp = ri->rsb; - next = rp->res_hashchain.next; + next = rb_next(&rp->res_hashnode); - if (next != &ls->ls_rsbtbl[bucket].list) { - r = list_entry(next, struct dlm_rsb, res_hashchain); + if (next) { + r = rb_entry(next, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; spin_unlock(&ls->ls_rsbtbl[bucket].lock); @@ -511,9 +513,9 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) } spin_lock(&ls->ls_rsbtbl[bucket].lock); - if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { - r = list_first_entry(&ls->ls_rsbtbl[bucket].list, - struct dlm_rsb, res_hashchain); + if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { + next = rb_first(&ls->ls_rsbtbl[bucket].keep); + r = rb_entry(next, struct dlm_rsb, res_hashnode); dlm_hold_rsb(r); ri->rsb = r; ri->bucket = bucket; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index fe2860c0244..5685a9a5dba 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -103,8 +103,8 @@ struct dlm_dirtable { }; struct dlm_rsbtable { - struct list_head list; - struct list_head toss; + struct rb_root keep; + struct rb_root toss; spinlock_t lock; }; @@ -285,7 +285,10 @@ struct dlm_rsb { unsigned long res_toss_time; uint32_t res_first_lkid; struct list_head res_lookup; /* lkbs waiting on first */ - struct list_head res_hashchain; /* rsbtbl */ + union { + struct list_head res_hashchain; + struct rb_node res_hashnode; /* rsbtbl */ + }; struct list_head res_grantqueue; struct list_head res_convertqueue; struct list_head res_waitqueue; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 83b5e32514e..d47183043c5 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -56,6 +56,7 @@ L: receive_xxxx_reply() <- R: send_xxxx_reply() */ #include +#include #include #include "dlm_internal.h" #include @@ -380,6 +381,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); list_del(&r->res_hashchain); + /* Convert the empty list_head to a NULL rb_node for tree usage: */ + memset(&r->res_hashnode, 0, sizeof(struct rb_node)); ls->ls_new_rsb_count--; spin_unlock(&ls->ls_new_rsb_spin); @@ -388,7 +391,6 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, memcpy(r->res_name, name, len); mutex_init(&r->res_mutex); - INIT_LIST_HEAD(&r->res_hashchain); INIT_LIST_HEAD(&r->res_lookup); INIT_LIST_HEAD(&r->res_grantqueue); INIT_LIST_HEAD(&r->res_convertqueue); @@ -400,14 +402,31 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, return 0; } -static int search_rsb_list(struct list_head *head, char *name, int len, +static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) +{ + char maxname[DLM_RESNAME_MAXLEN]; + + memset(maxname, 0, DLM_RESNAME_MAXLEN); + memcpy(maxname, name, nlen); + return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); +} + +static int search_rsb_tree(struct rb_root *tree, char *name, int len, unsigned int flags, struct dlm_rsb **r_ret) { + struct rb_node *node = tree->rb_node; struct dlm_rsb *r; int error = 0; - - list_for_each_entry(r, head, res_hashchain) { - if (len == r->res_length && !memcmp(name, r->res_name, len)) + int rc; + + while (node) { + r = rb_entry(node, struct dlm_rsb, res_hashnode); + rc = rsb_cmp(r, name, len); + if (rc < 0) + node = node->rb_left; + else if (rc > 0) + node = node->rb_right; + else goto found; } *r_ret = NULL; @@ -420,22 +439,54 @@ static int search_rsb_list(struct list_head *head, char *name, int len, return error; } +static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) +{ + struct rb_node **newn = &tree->rb_node; + struct rb_node *parent = NULL; + int rc; + + while (*newn) { + struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb, + res_hashnode); + + parent = *newn; + rc = rsb_cmp(cur, rsb->res_name, rsb->res_length); + if (rc < 0) + newn = &parent->rb_left; + else if (rc > 0) + newn = &parent->rb_right; + else { + log_print("rsb_insert match"); + dlm_dump_rsb(rsb); + dlm_dump_rsb(cur); + return -EEXIST; + } + } + + rb_link_node(&rsb->res_hashnode, parent, newn); + rb_insert_color(&rsb->res_hashnode, tree); + return 0; +} + static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, unsigned int flags, struct dlm_rsb **r_ret) { struct dlm_rsb *r; int error; - error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); + error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); if (!error) { kref_get(&r->res_ref); goto out; } - error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); if (error) goto out; - list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); + error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); + if (error) + return error; if (dlm_no_directory(ls)) goto out; @@ -527,8 +578,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, nodeid = 0; r->res_nodeid = nodeid; } - list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); - error = 0; + error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep); out_unlock: spin_unlock(&ls->ls_rsbtbl[bucket].lock); out: @@ -556,7 +606,8 @@ static void toss_rsb(struct kref *kref) DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); kref_init(&r->res_ref); - list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); + rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); r->res_toss_time = jiffies; if (r->res_lvbptr) { dlm_free_lvb(r->res_lvbptr); @@ -1082,19 +1133,19 @@ static void dir_remove(struct dlm_rsb *r) r->res_name, r->res_length); } -/* FIXME: shouldn't this be able to exit as soon as one non-due rsb is - found since they are in order of newest to oldest? */ +/* FIXME: make this more efficient */ static int shrink_bucket(struct dlm_ls *ls, int b) { + struct rb_node *n; struct dlm_rsb *r; int count = 0, found; for (;;) { found = 0; spin_lock(&ls->ls_rsbtbl[b].lock); - list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, - res_hashchain) { + for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { + r = rb_entry(n, struct dlm_rsb, res_hashnode); if (!time_after_eq(jiffies, r->res_toss_time + dlm_config.ci_toss_secs * HZ)) continue; @@ -1108,7 +1159,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) } if (kref_put(&r->res_ref, kill_rsb)) { - list_del(&r->res_hashchain); + rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); spin_unlock(&ls->ls_rsbtbl[b].lock); if (is_master(r)) @@ -4441,10 +4492,12 @@ int dlm_purge_locks(struct dlm_ls *ls) static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) { + struct rb_node *n; struct dlm_rsb *r, *r_ret = NULL; spin_lock(&ls->ls_rsbtbl[bucket].lock); - list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { + for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) { + r = rb_entry(n, struct dlm_rsb, res_hashnode); if (!rsb_flag(r, RSB_LOCKS_PURGED)) continue; hold_rsb(r); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index a1d8f1af144..1d16a23b0a0 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -457,8 +457,8 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, if (!ls->ls_rsbtbl) goto out_lsfree; for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); - INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); + ls->ls_rsbtbl[i].keep.rb_node = NULL; + ls->ls_rsbtbl[i].toss.rb_node = NULL; spin_lock_init(&ls->ls_rsbtbl[i].lock); } @@ -685,7 +685,7 @@ static int lockspace_busy(struct dlm_ls *ls, int force) static int release_lockspace(struct dlm_ls *ls, int force) { struct dlm_rsb *rsb; - struct list_head *head; + struct rb_node *n; int i, busy, rv; busy = lockspace_busy(ls, force); @@ -746,20 +746,15 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ for (i = 0; i < ls->ls_rsbtbl_size; i++) { - head = &ls->ls_rsbtbl[i].list; - while (!list_empty(head)) { - rsb = list_entry(head->next, struct dlm_rsb, - res_hashchain); - - list_del(&rsb->res_hashchain); + while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) { + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); + rb_erase(n, &ls->ls_rsbtbl[i].keep); dlm_free_rsb(rsb); } - head = &ls->ls_rsbtbl[i].toss; - while (!list_empty(head)) { - rsb = list_entry(head->next, struct dlm_rsb, - res_hashchain); - list_del(&rsb->res_hashchain); + while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) { + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); + rb_erase(n, &ls->ls_rsbtbl[i].toss); dlm_free_rsb(rsb); } } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 14638235f7b..50467cefdbd 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -715,6 +715,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls) int dlm_create_root_list(struct dlm_ls *ls) { + struct rb_node *n; struct dlm_rsb *r; int i, error = 0; @@ -727,7 +728,8 @@ int dlm_create_root_list(struct dlm_ls *ls) for (i = 0; i < ls->ls_rsbtbl_size; i++) { spin_lock(&ls->ls_rsbtbl[i].lock); - list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { + for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { + r = rb_entry(n, struct dlm_rsb, res_hashnode); list_add(&r->res_root_list, &ls->ls_root_list); dlm_hold_rsb(r); } @@ -741,7 +743,8 @@ int dlm_create_root_list(struct dlm_ls *ls) continue; } - list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) { + r = rb_entry(n, struct dlm_rsb, res_hashnode); list_add(&r->res_root_list, &ls->ls_root_list); dlm_hold_rsb(r); } @@ -771,16 +774,18 @@ void dlm_release_root_list(struct dlm_ls *ls) void dlm_clear_toss_list(struct dlm_ls *ls) { - struct dlm_rsb *r, *safe; + struct rb_node *n, *next; + struct dlm_rsb *rsb; int i; for (i = 0; i < ls->ls_rsbtbl_size; i++) { spin_lock(&ls->ls_rsbtbl[i].lock); - list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, - res_hashchain) { - if (dlm_no_directory(ls) || !is_master(r)) { - list_del(&r->res_hashchain); - dlm_free_rsb(r); + for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { + next = rb_next(n);; + rsb = rb_entry(n, struct dlm_rsb, res_hashnode); + if (dlm_no_directory(ls) || !is_master(rsb)) { + rb_erase(n, &ls->ls_rsbtbl[i].toss); + dlm_free_rsb(rsb); } } spin_unlock(&ls->ls_rsbtbl[i].lock); -- cgit v1.2.3 From 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 17 Nov 2011 16:42:19 -0500 Subject: NLS: improve UTF8 -> UTF16 string conversion routine The utf8s_to_utf16s conversion routine needs to be improved. Unlike its utf16s_to_utf8s sibling, it doesn't accept arguments specifying the maximum length of the output buffer or the endianness of its 16-bit output. This patch (as1501) adds the two missing arguments, and adjusts the only two places in the kernel where the function is called. A follow-on patch will add a third caller that does utilize the new capabilities. The two conversion routines are still annoyingly inconsistent in the way they handle invalid byte combinations. But that's a subject for a different patch. Signed-off-by: Alan Stern CC: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 10 ++++++---- fs/fat/namei_vfat.c | 3 ++- fs/nls/nls_base.c | 43 +++++++++++++++++++++++++++++++++---------- include/linux/nls.h | 5 +++-- 4 files changed, 44 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 89f52440fcf..0e8343f585b 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error) * The windows host expects the key/value pair to be encoded * in utf16. */ - keylen = utf8s_to_utf16s(key_name, strlen(key_name), - (wchar_t *)kvp_data->data.key); + keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN, + (wchar_t *) kvp_data->data.key, + HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2); kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */ - valuelen = utf8s_to_utf16s(value, strlen(value), - (wchar_t *)kvp_data->data.value); + valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN, + (wchar_t *) kvp_data->data.value, + HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2); kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */ kvp_data->data.value_type = REG_SZ; /* all our values are strings */ diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a87a65663c2..c25cf151b84 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, int charlen; if (utf8) { - *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); + *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, + (wchar_t *) outname, FAT_LFN_LEN + 2); if (*outlen < 0) return *outlen; else if (*outlen > FAT_LFN_LEN) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c..0eb059ec6f2 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) } EXPORT_SYMBOL(utf32_to_utf8); -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, + wchar_t *pwcs, int maxlen) { u16 *op; int size; unicode_t u; op = pwcs; - while (*s && len > 0) { + while (len > 0 && maxlen > 0 && *s) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); if (size < 0) return -EINVAL; + s += size; + len -= size; if (u >= PLANE_SIZE) { + if (maxlen < 2) + break; u -= PLANE_SIZE; - *op++ = (wchar_t) (SURROGATE_PAIR | - ((u >> 10) & SURROGATE_BITS)); - *op++ = (wchar_t) (SURROGATE_PAIR | + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | SURROGATE_LOW | - (u & SURROGATE_BITS)); + (u & SURROGATE_BITS), + endian); + maxlen -= 2; } else { - *op++ = (wchar_t) u; + put_utf16(op++, u, endian); + maxlen--; } - s += size; - len -= size; } else { - *op++ = *s++; + put_utf16(op++, *s++, endian); len--; + maxlen--; } } return op - pwcs; diff --git a/include/linux/nls.h b/include/linux/nls.h index d47beef08df..5dc635f8d79 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -43,7 +43,7 @@ enum utf16_endian { UTF16_BIG_ENDIAN }; -/* nls.c */ +/* nls_base.c */ extern int register_nls(struct nls_table *); extern int unregister_nls(struct nls_table *); extern struct nls_table *load_nls(char *); @@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void); extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); -extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs); +extern int utf8s_to_utf16s(const u8 *s, int len, + enum utf16_endian endian, wchar_t *pwcs, int maxlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen); -- cgit v1.2.3 From 8c111b3f56332a216b18cd57950bdf04ac8f2a98 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sat, 19 Nov 2011 17:34:29 +0800 Subject: jbd: clear revoked flag on buffers before a new transaction started Currently, we clear revoked flag only when a block is reused. However, this can tigger a false journal error. Consider a situation when a block is used as a meta block and is deleted(revoked) in ordered mode, then the block is allocated as a data block to a file. At this moment, user changes the file's journal mode from ordered to journaled and truncates the file. The block will be considered re-revoked by journal because it has revoked flag still pending from the last transaction and an assertion triggers. We fix the problem by keeping the revoked status more uptodate - we clear revoked flag when switching revoke tables to reflect there is no revoked buffers in current transaction any more. Signed-off-by: Yongqiang Yang Signed-off-by: Jan Kara --- fs/jbd/commit.c | 6 ++++++ fs/jbd/revoke.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/jbd.h | 1 + 3 files changed, 41 insertions(+) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 8799207df05..f2b9a571f4c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -391,6 +391,12 @@ void journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 1\n"); + /* + * Clear revoked flag to reflect there is no revoked buffers + * in the next transaction which is going to be started. + */ + journal_clear_buffer_revoked_flags(journal); + /* * Switch to a new revoke table. */ diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 305a9076315..25c713e7071 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -47,6 +47,10 @@ * overwriting the new data. We don't even need to clear the revoke * bit here. * + * We cache revoke status of a buffer in the current transaction in b_states + * bits. As the name says, revokevalid flag indicates that the cached revoke + * status of a buffer is valid and we can rely on the cached status. + * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up @@ -479,6 +483,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) return did_revoke; } +/* + * journal_clear_revoked_flags clears revoked flag of buffers in + * revoke table to reflect there is no revoked buffer in the next + * transaction which is going to be started. + */ +void journal_clear_buffer_revoked_flags(journal_t *journal) +{ + struct jbd_revoke_table_s *revoke = journal->j_revoke; + int i = 0; + + for (i = 0; i < revoke->hash_size; i++) { + struct list_head *hash_list; + struct list_head *list_entry; + hash_list = &revoke->hash_table[i]; + + list_for_each(list_entry, hash_list) { + struct jbd_revoke_record_s *record; + struct buffer_head *bh; + record = (struct jbd_revoke_record_s *)list_entry; + bh = __find_get_block(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); + if (bh) { + clear_buffer_revoked(bh); + __brelse(bh); + } + } + } +} + /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz diff --git a/include/linux/jbd.h b/include/linux/jbd.h index c7acdde3243..492cc12244f 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -913,6 +913,7 @@ extern int journal_set_revoke(journal_t *, unsigned int, tid_t); extern int journal_test_revoke(journal_t *, unsigned int, tid_t); extern void journal_clear_revoke(journal_t *); extern void journal_switch_revoke_table(journal_t *journal); +extern void journal_clear_buffer_revoked_flags(journal_t *journal); /* * The log thread user interface: -- cgit v1.2.3 From eaecf43a6970c8d0ef54a31427c82a99e4863fe8 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Fri, 18 Nov 2011 00:00:52 +0100 Subject: UBIFS: Use kmemdup rather than duplicating its implementation The semantic patch that makes this change is available in scripts/coccinelle/api/memdup.cocci. Signed-off-by: Thomas Meyer Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt.c | 6 ++---- fs/ubifs/tnc.c | 3 +-- fs/ubifs/xattr.c | 6 ++---- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 6189c74d97f..66d59d0a140 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1986,12 +1986,11 @@ again: if (path[h].in_tree) continue; - nnode = kmalloc(sz, GFP_NOFS); + nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); if (!nnode) { err = -ENOMEM; goto out; } - memcpy(nnode, &path[h].nnode, sz); parent = nnode->parent; parent->nbranch[nnode->iip].nnode = nnode; path[h].ptr.nnode = nnode; @@ -2004,12 +2003,11 @@ again: const size_t sz = sizeof(struct ubifs_pnode); struct ubifs_nnode *parent; - pnode = kmalloc(sz, GFP_NOFS); + pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); if (!pnode) { err = -ENOMEM; goto out; } - memcpy(pnode, &path[h].pnode, sz); parent = pnode->parent; parent->nbranch[pnode->iip].pnode = pnode; path[h].ptr.pnode = pnode; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 06673864768..e14ee53159d 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -344,12 +344,11 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, return err; } - lnc_node = kmalloc(zbr->len, GFP_NOFS); + lnc_node = kmemdup(node, zbr->len, GFP_NOFS); if (!lnc_node) /* We don't have to have the cache, so no error */ return 0; - memcpy(lnc_node, node, zbr->len); zbr->leaf = lnc_node; return 0; } diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index bf18f7a0454..85b27226875 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -138,12 +138,11 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui = ubifs_inode(inode); ui->xattr = 1; ui->flags |= UBIFS_XATTR_FL; - ui->data = kmalloc(size, GFP_NOFS); + ui->data = kmemdup(value, size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; @@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, return err; kfree(ui->data); - ui->data = kmalloc(size, GFP_NOFS); + ui->data = kmemdup(value, size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; -- cgit v1.2.3 From bcdd0c1600903e9222abfcde28947406020ccb5d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 Nov 2011 11:00:20 +0300 Subject: ext3: NULL dereference in ext3_evict_inode() This is an fsfuzzer bug. ->s_journal is set at the end of ext3_load_journal() but we try to use it in the error handling from ext3_get_journal() while it's still NULL. [ 337.039041] BUG: unable to handle kernel NULL pointer dereference at 0000000000000024 [ 337.040380] IP: [] _raw_spin_lock+0x9/0x30 [ 337.041687] PGD 0 [ 337.043118] Oops: 0002 [#1] SMP [ 337.044483] CPU 3 [ 337.044495] Modules linked in: ecb md4 cifs fuse kvm_intel kvm brcmsmac brcmutil crc8 cordic r8169 [last unloaded: scsi_wait_scan] [ 337.047633] [ 337.049259] Pid: 8308, comm: mount Not tainted 3.2.0-rc2-next-20111121+ #24 SAMSUNG ELECTRONICS CO., LTD. RV411/RV511/E3511/S3511 /RV411/RV511/E3511/S3511 [ 337.051064] RIP: 0010:[] [] _raw_spin_lock+0x9/0x30 [ 337.052879] RSP: 0018:ffff8800b1d11ae8 EFLAGS: 00010282 [ 337.054668] RAX: 0000000000000100 RBX: 0000000000000000 RCX: ffff8800b77c2000 [ 337.056400] RDX: ffff8800a97b5c00 RSI: 0000000000000000 RDI: 0000000000000024 [ 337.058099] RBP: ffff8800b1d11ae8 R08: 6000000000000000 R09: e018000000000000 [ 337.059841] R10: ff67366cc2607c03 R11: 00000000110688e6 R12: 0000000000000000 [ 337.061607] R13: 0000000000000000 R14: 0000000000000000 R15: ffff8800a78f06e8 [ 337.063385] FS: 00007f9d95652800(0000) GS:ffff8800b7180000(0000) knlGS:0000000000000000 [ 337.065110] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 337.066801] CR2: 0000000000000024 CR3: 00000000aef2c000 CR4: 00000000000006e0 [ 337.068581] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 337.070321] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 337.072105] Process mount (pid: 8308, threadinfo ffff8800b1d10000, task ffff8800b1d02be0) [ 337.073800] Stack: [ 337.075487] ffff8800b1d11b08 ffffffff811f48cf ffff88007ac9b158 0000000000000000 [ 337.077255] ffff8800b1d11b38 ffffffff8119405d ffff88007ac9b158 ffff88007ac9b250 [ 337.078851] ffffffff8181bda0 ffffffff8181bda0 ffff8800b1d11b68 ffffffff81131e31 [ 337.080284] Call Trace: [ 337.081706] [] log_start_commit+0x1f/0x40 [ 337.083107] [] ext3_evict_inode+0x1fd/0x2a0 [ 337.084490] [] evict+0xa1/0x1a0 [ 337.085857] [] iput+0x101/0x210 [ 337.087220] [] iget_failed+0x21/0x30 [ 337.088581] [] ext3_iget+0x15c/0x450 [ 337.089936] [] ? ext3_rsv_window_add+0x81/0x100 [ 337.091284] [] ext3_get_journal+0x15/0xde [ 337.092641] [] ext3_fill_super+0xf2b/0x1c30 [ 337.093991] [] ? register_shrinker+0x4d/0x60 [ 337.095332] [] mount_bdev+0x1a2/0x1e0 [ 337.096680] [] ? ext3_setup_super+0x210/0x210 [ 337.098026] [] ext3_mount+0x10/0x20 [ 337.099362] [] mount_fs+0x3e/0x1b0 [ 337.100759] [] ? __alloc_percpu+0xb/0x10 [ 337.102330] [] vfs_kern_mount+0x65/0xc0 [ 337.103889] [] do_kern_mount+0x4f/0x100 [ 337.105442] [] do_mount+0x19c/0x890 [ 337.106989] [] ? memdup_user+0x46/0x90 [ 337.108572] [] ? strndup_user+0x53/0x70 [ 337.110114] [] sys_mount+0x8b/0xe0 [ 337.111617] [] system_call_fastpath+0x16/0x1b [ 337.113133] Code: 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b 5d c3 0f 1f 84 00 00 00 00 00 55 b8 00 01 00 00 48 89 e5 66 0f c1 07 0f b6 d4 38 c2 74 0c 0f 1f 00 f3 90 0f b6 07 38 [ 337.116588] RIP [] _raw_spin_lock+0x9/0x30 [ 337.118260] RSP [ 337.119998] CR2: 0000000000000024 [ 337.188701] ---[ end trace c36d790becac1615 ]--- Signed-off-by: Dan Carpenter Signed-off-by: Jan Kara --- fs/ext3/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 85fe655fe3e..9da9125ba3e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -223,8 +223,12 @@ void ext3_evict_inode (struct inode *inode) * * Note that directories do not have this problem because they don't * use page cache. + * + * The s_journal check handles the case when ext3_get_journal() fails + * and puts the journal inode. */ if (inode->i_nlink && ext3_should_journal_data(inode) && + EXT3_SB(inode->i_sb)->s_journal && (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { tid_t commit_tid = atomic_read(&ei->i_datasync_tid); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; -- cgit v1.2.3 From 67114fe6103183190fb0a24f58e5695a80beb2ec Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 17 Nov 2011 23:43:40 +0100 Subject: nfsd4: Use kmemdup rather than duplicating its implementation The semantic patch that makes this change is available in scripts/coccinelle/api/memdup.cocci. Signed-off-by: Thomas Meyer Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- fs/nfsd/nfs4xdr.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a84978c13bb..6ab677913f9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -986,12 +986,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); + clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); if (clp->cl_name.data == NULL) { kfree(clp); return NULL; } - memcpy(clp->cl_name.data, name.data, name.len); clp->cl_name.len = name.len; return clp; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b6fa792d6b8..0ec5a1b9700 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp, static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); + p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); if (!p) return NULL; - memcpy(p, argp->tmp, nbytes); } else { BUG_ON(p != argp->tmpp); argp->tmpp = NULL; -- cgit v1.2.3 From 045ddc8991698a8e9c5668c6190faa8b5d516dc0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Nov 2011 10:15:13 -0500 Subject: NLS: raname "maxlen" to "maxout" in UTF conversion routines As requested by NamJae Jeon, this patch (as1503) changes the name of the "maxlen" parameters to "maxout" in the various UTF conversion routines. This should make the role of that parameter more clear. The patch also renames the "len" parameters to "inlen", for the same reason. Signed-off-by: Alan Stern Reviewed-by: NamJae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/nls/nls_base.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 0eb059ec6f2..fea6bd5831d 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -52,7 +52,7 @@ static const struct utf8_table utf8_table[] = #define SURROGATE_LOW 0x00000400 #define SURROGATE_BITS 0x000003ff -int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) +int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) { unsigned long l; int c0, c, nc; @@ -71,7 +71,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) *pu = (unicode_t) l; return nc; } - if (len <= nc) + if (inlen <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; @@ -83,7 +83,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) } EXPORT_SYMBOL(utf8_to_utf32); -int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) +int utf32_to_utf8(unicode_t u, u8 *s, int maxout) { unsigned long l; int c, nc; @@ -97,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) return -1; nc = 0; - for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { + for (t = utf8_table; t->cmask && maxout; t++, maxout--) { nc++; if (l <= t->lmask) { c = t->shift; @@ -129,24 +129,24 @@ static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) } } -int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, - wchar_t *pwcs, int maxlen) +int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, + wchar_t *pwcs, int maxout) { u16 *op; int size; unicode_t u; op = pwcs; - while (len > 0 && maxlen > 0 && *s) { + while (inlen > 0 && maxout > 0 && *s) { if (*s & 0x80) { - size = utf8_to_utf32(s, len, &u); + size = utf8_to_utf32(s, inlen, &u); if (size < 0) return -EINVAL; s += size; - len -= size; + inlen -= size; if (u >= PLANE_SIZE) { - if (maxlen < 2) + if (maxout < 2) break; u -= PLANE_SIZE; put_utf16(op++, SURROGATE_PAIR | @@ -156,15 +156,15 @@ int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, SURROGATE_LOW | (u & SURROGATE_BITS), endian); - maxlen -= 2; + maxout -= 2; } else { put_utf16(op++, u, endian); - maxlen--; + maxout--; } } else { put_utf16(op++, *s++, endian); - len--; - maxlen--; + inlen--; + maxout--; } } return op - pwcs; @@ -183,27 +183,27 @@ static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) } } -int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, - u8 *s, int maxlen) +int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, + u8 *s, int maxout) { u8 *op; int size; unsigned long u, v; op = s; - while (len > 0 && maxlen > 0) { + while (inlen > 0 && maxout > 0) { u = get_utf16(*pwcs, endian); if (!u) break; pwcs++; - len--; + inlen--; if (u > 0x7f) { if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { if (u & SURROGATE_LOW) { /* Ignore character and move on */ continue; } - if (len <= 0) + if (inlen <= 0) break; v = get_utf16(*pwcs, endian); if ((v & SURROGATE_MASK) != SURROGATE_PAIR || @@ -214,18 +214,18 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + (v & SURROGATE_BITS); pwcs++; - len--; + inlen--; } - size = utf32_to_utf8(u, op, maxlen); + size = utf32_to_utf8(u, op, maxout); if (size == -1) { /* Ignore character and move on */ } else { op += size; - maxlen -= size; + maxout -= size; } } else { *op++ = (u8) u; - maxlen--; + maxout--; } } return op - s; -- cgit v1.2.3 From b2ea70afade7080360ac55c4e64ff7a5fafdb67b Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 18 Nov 2011 12:14:49 +0200 Subject: nfsd: Fix oops when parsing a 0 length export expkey_parse() oopses when handling a 0 length export. This is easily triggerable from usermode by writing 0 bytes into '/proc/[proc id]/net/rpc/nfsd.fh/channel'. Below is the log: [ 1402.286893] BUG: unable to handle kernel paging request at ffff880077c49fff [ 1402.287632] IP: [] expkey_parse+0x28/0x2e1 [ 1402.287632] PGD 2206063 PUD 1fdfd067 PMD 1ffbc067 PTE 8000000077c49160 [ 1402.287632] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 1402.287632] CPU 1 [ 1402.287632] Pid: 20198, comm: trinity Not tainted 3.2.0-rc2-sasha-00058-gc65cd37 #6 [ 1402.287632] RIP: 0010:[] [] expkey_parse+0x28/0x2e1 [ 1402.287632] RSP: 0018:ffff880077f0fd68 EFLAGS: 00010292 [ 1402.287632] RAX: ffff880077c49fff RBX: 00000000ffffffea RCX: 0000000001043400 [ 1402.287632] RDX: 0000000000000000 RSI: ffff880077c4a000 RDI: ffffffff82283de0 [ 1402.287632] RBP: ffff880077f0fe18 R08: 0000000000000001 R09: ffff880000000000 [ 1402.287632] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880077c4a000 [ 1402.287632] R13: ffffffff82283de0 R14: 0000000001043400 R15: ffffffff82283de0 [ 1402.287632] FS: 00007f25fec3f700(0000) GS:ffff88007d400000(0000) knlGS:0000000000000000 [ 1402.287632] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1402.287632] CR2: ffff880077c49fff CR3: 0000000077e1d000 CR4: 00000000000406e0 [ 1402.287632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1402.287632] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 1402.287632] Process trinity (pid: 20198, threadinfo ffff880077f0e000, task ffff880077db17b0) [ 1402.287632] Stack: [ 1402.287632] ffff880077db17b0 ffff880077c4a000 ffff880077f0fdb8 ffffffff810b411e [ 1402.287632] ffff880000000000 ffff880077db17b0 ffff880077c4a000 ffffffff82283de0 [ 1402.287632] 0000000001043400 ffffffff82283de0 ffff880077f0fde8 ffffffff81111f63 [ 1402.287632] Call Trace: [ 1402.287632] [] ? lock_release+0x1af/0x1bc [ 1402.287632] [] ? might_fault+0x97/0x9e [ 1402.287632] [] ? might_fault+0x4e/0x9e [ 1402.287632] [] cache_do_downcall+0x3e/0x4f [ 1402.287632] [] cache_write.clone.16+0xbb/0x130 [ 1402.287632] [] ? cache_write_pipefs+0x1a/0x1a [ 1402.287632] [] cache_write_procfs+0x19/0x1b [ 1402.287632] [] proc_reg_write+0x8e/0xad [ 1402.287632] [] vfs_write+0xaa/0xfd [ 1402.287632] [] ? fget_light+0x35/0x9e [ 1402.287632] [] sys_write+0x48/0x6f [ 1402.287632] [] system_call_fastpath+0x16/0x1b [ 1402.287632] Code: c0 c9 c3 55 48 63 d2 48 89 e5 48 8d 44 32 ff 41 57 41 56 41 55 41 54 53 bb ea ff ff ff 48 81 ec 88 00 00 00 48 89 b5 58 ff ff ff [ 1402.287632] 38 0a 0f 85 89 02 00 00 c6 00 00 48 8b 3d 44 4a e5 01 48 85 [ 1402.287632] RIP [] expkey_parse+0x28/0x2e1 [ 1402.287632] RSP [ 1402.287632] CR2: ffff880077c49fff [ 1402.287632] ---[ end trace 368ef53ff773a5e3 ]--- Cc: "J. Bruce Fields" Cc: Neil Brown Cc: linux-nfs@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b9074e8..5f312abf124 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mesg[mlen-1] != '\n') + if (mlen < 1 || mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; -- cgit v1.2.3 From 0cf99b91c669510b785b459c211772091a94efd5 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Wed, 23 Nov 2011 10:48:40 +0800 Subject: nfsd41: allow non-reclaim open-by-fh's in 4.1 With NFSv4.0 it was safe to assume that open-by-filehandles were always reclaims. With NFSv4.1 there are non-reclaim open-by-filehandle operations, so we should ensure we're only insisting on reclaims in the OPEN_CLAIM_PREVIOUS case. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index fa383361bc6..9415bc415ce 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ { __be32 status; - /* Only reclaims from previously confirmed clients are valid */ - if ((status = nfs4_check_open_reclaim(&open->op_clientid))) - return status; - /* We don't know the target directory, and therefore can not * set the change info */ @@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + status = nfs4_check_open_reclaim(&open->op_clientid); + if (status) + goto out; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, &cstate->current_fh, -- cgit v1.2.3 From f5c8593b94190aabdcf207a544f082c7816c4fe6 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 7 Dec 2011 12:57:56 +0300 Subject: NFSd: use network-namespace-aware cache registering routines v2: cache_register_net() and cache_unregister_net() GPL exports added This is a cleanup patch. Hope, some day generic cache_register() and cache_unregister() will be removed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 10 +++++----- fs/nfsd/nfs4idmap.c | 11 ++++++----- net/sunrpc/cache.c | 2 ++ 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5f312abf124..cf8a6bd062f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1226,12 +1226,12 @@ nfsd_export_init(void) int rv; dprintk("nfsd: initializing export module.\n"); - rv = cache_register(&svc_export_cache); + rv = cache_register_net(&svc_export_cache, &init_net); if (rv) return rv; - rv = cache_register(&svc_expkey_cache); + rv = cache_register_net(&svc_expkey_cache, &init_net); if (rv) - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_export_cache, &init_net); return rv; } @@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void) dprintk("nfsd: shutting down export module.\n"); - cache_unregister(&svc_expkey_cache); - cache_unregister(&svc_export_cache); + cache_unregister_net(&svc_expkey_cache, &init_net); + cache_unregister_net(&svc_export_cache, &init_net); svcauth_unix_purge(); dprintk("nfsd: export shutdown complete.\n"); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 55780a22fdb..94096273cd6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -466,20 +467,20 @@ nfsd_idmap_init(void) { int rv; - rv = cache_register(&idtoname_cache); + rv = cache_register_net(&idtoname_cache, &init_net); if (rv) return rv; - rv = cache_register(&nametoid_cache); + rv = cache_register_net(&nametoid_cache, &init_net); if (rv) - cache_unregister(&idtoname_cache); + cache_unregister_net(&idtoname_cache, &init_net); return rv; } void nfsd_idmap_shutdown(void) { - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); + cache_unregister_net(&idtoname_cache, &init_net); + cache_unregister_net(&nametoid_cache, &init_net); } static int diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 72ad836e4fe..b8daa577734 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net) sunrpc_destroy_cache_detail(cd); return ret; } +EXPORT_SYMBOL_GPL(cache_register_net); int cache_register(struct cache_detail *cd) { @@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) remove_cache_proc_entries(cd, net); sunrpc_destroy_cache_detail(cd); } +EXPORT_SYMBOL_GPL(cache_unregister_net); void cache_unregister(struct cache_detail *cd) { -- cgit v1.2.3 From f32f3c2d3f09a586349ca9180885dc8741290fd9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 12 Dec 2011 15:00:35 -0500 Subject: nfsd4: initialize special stateid's at compile time Stateid's with "other" ("opaque") field all zeros or all ones are reserved. We define all_ones separately on the off chance there will be more such some day, though currently all the other special stateid's have zero other field. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6ab677913f9..213da7b7e7d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -49,12 +49,20 @@ time_t nfsd4_lease = 90; /* default lease time */ time_t nfsd4_grace = 90; static time_t boot_time; -static stateid_t zerostateid; /* bits all 0 */ -static stateid_t onestateid; /* bits all 1 */ + +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { + .si_generation = ~0, + .si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { + /* all fields zero */ +}; + static u64 current_sessionid = 1; -#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) -#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) /* forward declarations */ static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -4564,7 +4572,6 @@ nfs4_state_init(void) } for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); - memset(&onestateid, ~0, sizeof(stateid_t)); INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); -- cgit v1.2.3 From c07c3d193412bbf4e9f405e75dc84e35e77fac28 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 13 Dec 2011 11:58:48 +0100 Subject: fuse: llseek optimize SEEK_CUR and SEEK_SET Use generic_file_llseek() instead of open coding the seek function. i_mutex protection is only necessary for SEEK_END (and SEEK_HOLE, SEEK_DATA), so move SEEK_CUR and SEEK_SET out from under i_mutex. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 48 ++++++++---------------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0c84100acd4..e0b60acfe72 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1555,48 +1555,16 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) loff_t retval; struct inode *inode = file->f_path.dentry->d_inode; - mutex_lock(&inode->i_mutex); - if (origin != SEEK_CUR && origin != SEEK_SET) { - retval = fuse_update_attributes(inode, NULL, file, NULL); - if (retval) - goto exit; - } + /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ + if (origin == SEEK_CUR || origin == SEEK_SET) + return generic_file_llseek(file, offset, origin); - switch (origin) { - case SEEK_END: - offset += i_size_read(inode); - break; - case SEEK_CUR: - if (offset == 0) { - retval = file->f_pos; - goto exit; - } - offset += file->f_pos; - break; - case SEEK_DATA: - if (offset >= i_size_read(inode)) { - retval = -ENXIO; - goto exit; - } - break; - case SEEK_HOLE: - if (offset >= i_size_read(inode)) { - retval = -ENXIO; - goto exit; - } - offset = i_size_read(inode); - break; - } - retval = -EINVAL; - if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - retval = offset; - } -exit: + mutex_lock(&inode->i_mutex); + retval = fuse_update_attributes(inode, NULL, file, NULL); + if (!retval) + retval = generic_file_llseek(file, offset, origin); mutex_unlock(&inode->i_mutex); + return retval; } -- cgit v1.2.3 From c411cc88d873b3f68635a04691f7f115c46bc39e Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Tue, 29 Nov 2011 22:08:00 +0100 Subject: fuse: Use kcalloc instead of kzalloc to allocate array The advantage of kcalloc is, that will prevent integer overflows which could result from the multiplication of number of elements and size and it is also a bit nicer to read. The semantic patch that makes this change is available in https://lkml.org/lkml/2011/11/25/107 Signed-off-by: Thomas Meyer Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e0b60acfe72..c297425cba7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1776,7 +1776,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; - pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); + pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); if (!pages || !iov_page) goto out; -- cgit v1.2.3 From b18da0c56e9ff43a007b6c8e302c62e720964151 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 13 Dec 2011 11:58:49 +0100 Subject: fuse: support ioctl on directories Multiplexing filesystems may want to support ioctls on the underlying files and directores (e.g. FS_IOC_{GET,SET}FLAGS). Ioctl support on directories was missing so add it now. Reported-by: Antonio SJ Musumeci Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 26 ++++++++++++++++++++++++++ fs/fuse/file.c | 8 ++++---- fs/fuse/fuse_i.h | 2 ++ include/linux/fuse.h | 7 ++++++- 4 files changed, 38 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 9f63e493a9b..344577933f6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1182,6 +1182,30 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, return fuse_fsync_common(file, start, end, datasync, 1); } +static long fuse_dir_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); + + /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ + if (fc->minor < 18) + return -ENOTTY; + + return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); +} + +static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); + + if (fc->minor < 18) + return -ENOTTY; + + return fuse_ioctl_common(file, cmd, arg, + FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); +} + static bool update_mtime(unsigned ivalid) { /* Always update if mtime is explicitly set */ @@ -1596,6 +1620,8 @@ static const struct file_operations fuse_dir_operations = { .open = fuse_dir_open, .release = fuse_dir_release, .fsync = fuse_dir_fsync, + .unlocked_ioctl = fuse_dir_ioctl, + .compat_ioctl = fuse_dir_compat_ioctl, }; static const struct inode_operations fuse_common_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c297425cba7..4a199fd93fb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1926,8 +1926,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, } EXPORT_SYMBOL_GPL(fuse_do_ioctl); -static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) +long fuse_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) { struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); @@ -1944,13 +1944,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, static long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_ioctl_common(file, cmd, arg, 0); + return fuse_ioctl_common(file, cmd, arg, 0); } static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); + return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cf6db0a9321..09337bcc255 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -765,6 +765,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int write); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); +long fuse_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags); unsigned fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 464cff52686..446c89718b9 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -50,6 +50,9 @@ * * 7.17 * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK + * + * 7.18 + * - add FUSE_IOCTL_DIR flag */ #ifndef _LINUX_FUSE_H @@ -81,7 +84,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 17 +#define FUSE_KERNEL_MINOR_VERSION 18 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -214,6 +217,7 @@ struct fuse_file_lock { * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed * FUSE_IOCTL_RETRY: retry with new iovecs * FUSE_IOCTL_32BIT: 32bit ioctl + * FUSE_IOCTL_DIR: is a directory * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ @@ -221,6 +225,7 @@ struct fuse_file_lock { #define FUSE_IOCTL_UNRESTRICTED (1 << 1) #define FUSE_IOCTL_RETRY (1 << 2) #define FUSE_IOCTL_32BIT (1 << 3) +#define FUSE_IOCTL_DIR (1 << 4) #define FUSE_IOCTL_MAX_IOV 256 -- cgit v1.2.3 From 451d0f599934fd97faf54a5d7954b518e66192cb Mon Sep 17 00:00:00 2001 From: John Muir Date: Tue, 6 Dec 2011 21:50:06 +0100 Subject: FUSE: Notifying the kernel of deletion. Allows a FUSE file-system to tell the kernel when a file or directory is deleted. If the specified dentry has the specified inode number, the kernel will unhash it. The current 'fuse_notify_inval_entry' does not cause the kernel to clean up directories that are in use properly, and as a result the users of those directories see incorrect semantics from the file-system. The error condition seen when 'fuse_notify_inval_entry' is used to notify of a deleted directory is avoided when 'fuse_notify_delete' is used instead. The following scenario demonstrates the difference: 1. User A chdirs into 'testdir' and starts reading 'testfile'. 2. User B rm -rf 'testdir'. 3. User B creates 'testdir'. 4. User C chdirs into 'testdir'. If you run the above within the same machine on any file-system (including fuse file-systems), there is no problem: user C is able to chdir into the new testdir. The old testdir is removed from the dentry tree, but still open by user A. If operations 2 and 3 are performed via the network such that the fuse file-system uses one of the notify functions to tell the kernel that the nodes are gone, then the following error occurs for user C while user A holds the original directory open: muirj@empacher:~> ls /test/testdir ls: cannot access /test/testdir: No such file or directory The issue here is that the kernel still has a dentry for testdir, and so it is requesting the attributes for the old directory, while the file-system is responding that the directory no longer exists. If on the other hand, if the file-system can notify the kernel that the directory is deleted using the new 'fuse_notify_delete' function, then the above ls will find the new directory as expected. Signed-off-by: John Muir Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++- fs/fuse/dir.c | 32 +++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 8 +++++++- include/linux/fuse.h | 9 +++++++++ 4 files changed, 102 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2aaf3eaaf13..5f3368ab0fa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, down_read(&fc->killsb); err = -ENOENT; if (fc->sb) - err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name); + up_read(&fc->killsb); + kfree(buf); + return err; + +err: + kfree(buf); + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_delete_out outarg; + int err = -ENOMEM; + char *buf; + struct qstr name; + + buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); + if (!buf) + goto err; + + err = -EINVAL; + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (fc->sb) + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, + outarg.child, &name); up_read(&fc->killsb); kfree(buf); return err; @@ -1597,6 +1649,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_RETRIEVE: return fuse_notify_retrieve(fc, size, cs); + case FUSE_NOTIFY_DELETE: + return fuse_notify_delete(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 344577933f6..bef8c3011d3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, } int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, - struct qstr *name) + u64 child_nodeid, struct qstr *name) { int err = -ENOTDIR; struct inode *parent; @@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, fuse_invalidate_attr(parent); fuse_invalidate_entry(entry); + + if (child_nodeid != 0 && entry->d_inode) { + mutex_lock(&entry->d_inode->i_mutex); + if (get_node_id(entry->d_inode) != child_nodeid) { + err = -ENOENT; + goto badentry; + } + if (d_mountpoint(entry)) { + err = -EBUSY; + goto badentry; + } + if (S_ISDIR(entry->d_inode->i_mode)) { + shrink_dcache_parent(entry); + if (!simple_empty(entry)) { + err = -ENOTEMPTY; + goto badentry; + } + entry->d_inode->i_flags |= S_DEAD; + } + dont_mount(entry); + clear_nlink(entry->d_inode); + err = 0; + badentry: + mutex_unlock(&entry->d_inode->i_mutex); + if (!err) + d_delete(entry); + } else { + err = 0; + } dput(entry); - err = 0; unlock: mutex_unlock(&parent->i_mutex); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 09337bcc255..a571584a091 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, /** * File-system tells the kernel to invalidate parent attributes and * the dentry matching parent/name. + * + * If the child_nodeid is non-zero and: + * - matches the inode number for the dentry matching parent/name, + * - is not a mount point + * - is a file or oan empty directory + * then the dentry is unhashed (d_delete()). */ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, - struct qstr *name); + u64 child_nodeid, struct qstr *name); int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 446c89718b9..8ba2c9460b2 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -53,6 +53,7 @@ * * 7.18 * - add FUSE_IOCTL_DIR flag + * - add FUSE_NOTIFY_DELETE */ #ifndef _LINUX_FUSE_H @@ -288,6 +289,7 @@ enum fuse_notify_code { FUSE_NOTIFY_INVAL_ENTRY = 3, FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_CODE_MAX, }; @@ -611,6 +613,13 @@ struct fuse_notify_inval_entry_out { __u32 padding; }; +struct fuse_notify_delete_out { + __u64 parent; + __u64 child; + __u32 namelen; + __u32 padding; +}; + struct fuse_notify_store_out { __u64 nodeid; __u64 offset; -- cgit v1.2.3 From 1ff97647f066aef72ae68042c9abc4a837a12e6d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Dec 2011 11:06:49 -0800 Subject: char_dev.c: fix up some whitespace errors Remove some minor whitespace errors (2 trailing spaces, and one space needed for a comma) to make the file checkpatch.pl clean with the exception of the exports, which is fine for now. Signed-off-by: Greg Kroah-Hartman --- fs/char_dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index dca9e5e0f73..3f152b92a94 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -272,7 +272,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, cd = __register_chrdev_region(major, baseminor, count, name); if (IS_ERR(cd)) return PTR_ERR(cd); - + cdev = cdev_alloc(); if (!cdev) goto out2; @@ -280,7 +280,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, cdev->owner = fops->owner; cdev->ops = fops; kobject_set_name(&cdev->kobj, "%s", name); - + err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); if (err) goto out; @@ -405,7 +405,7 @@ static int chrdev_open(struct inode *inode, struct file *filp) goto out_cdev_put; if (filp->f_op->open) { - ret = filp->f_op->open(inode,filp); + ret = filp->f_op->open(inode, filp); if (ret) goto out_cdev_put; } -- cgit v1.2.3 From 39c4cc0fcc38cff29d5b9884372b17c894c9c080 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 13 Dec 2011 16:35:58 -0500 Subject: NFSD: Only reinitilize the recall_lru list under the recall lock unhash_delegation() will grab the recall lock before calling list_del_init() in each of these places. This patch removes the redundant calls. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 213da7b7e7d..19ca9b54200 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1066,7 +1066,6 @@ expire_client(struct nfs4_client *clp) spin_unlock(&recall_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { @@ -3133,7 +3132,6 @@ nfs4_laundromat(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } test_val = nfsd4_lease; @@ -4674,7 +4672,6 @@ __nfs4_state_shutdown(void) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); unhash_delegation(dp); } -- cgit v1.2.3 From 6e736be7f282fff705db7c34a15313281b372a76 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:38 +0100 Subject: block: make ioc get/put interface more conventional and fix race on alloction Ignoring copy_io() during fork, io_context can be allocated from two places - current_io_context() and set_task_ioprio(). The former is always called from local task while the latter can be called from different task. The synchornization between them are peculiar and dubious. * current_io_context() doesn't grab task_lock() and assumes that if it saw %NULL ->io_context, it would stay that way until allocation and assignment is complete. It has smp_wmb() between alloc/init and assignment. * set_task_ioprio() grabs task_lock() for assignment and does smp_read_barrier_depends() between "ioc = task->io_context" and "if (ioc)". Unfortunately, this doesn't achieve anything - the latter is not a dependent load of the former. ie, if ioc itself were being dereferenced "ioc->xxx", it would mean something (not sure what tho) but as the code currently stands, the dependent read barrier is noop. As only one of the the two test-assignment sequences is task_lock() protected, the task_lock() can't do much about race between the two. Nothing prevents current_io_context() and set_task_ioprio() allocating its own ioc for the same task and overwriting the other's. Also, set_task_ioprio() can race with exiting task and create a new ioc after exit_io_context() is finished. ioc get/put doesn't have any reason to be complex. The only hot path is accessing the existing ioc of %current, which is simple to achieve given that ->io_context is never destroyed as long as the task is alive. All other paths can happily go through task_lock() like all other task sub structures without impacting anything. This patch updates ioc get/put so that it becomes more conventional. * alloc_io_context() is replaced with get_task_io_context(). This is the only interface which can acquire access to ioc of another task. On return, the caller has an explicit reference to the object which should be put using put_io_context() afterwards. * The functionality of current_io_context() remains the same but when creating a new ioc, it shares the code path with get_task_io_context() and always goes through task_lock(). * get_io_context() now means incrementing ref on an ioc which the caller already has access to (be that an explicit refcnt or implicit %current one). * PF_EXITING inhibits creation of new io_context and once exit_io_context() is finished, it's guaranteed that both ioc acquisition functions return %NULL. * All users are updated. Most are trivial but smp_read_barrier_depends() removal from cfq_get_io_context() needs a bit of explanation. I suppose the original intention was to ensure ioc->ioprio is visible when set_task_ioprio() allocates new io_context and installs it; however, this wouldn't have worked because set_task_ioprio() doesn't have wmb between init and install. There are other problems with this which will be fixed in another patch. * While at it, use NUMA_NO_NODE instead of -1 for wildcard node specification. -v2: Vivek spotted contamination from debug patch. Removed. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 9 +++-- block/blk-ioc.c | 99 +++++++++++++++++++++++++++++++---------------- block/blk.h | 1 + block/cfq-iosched.c | 18 ++++----- fs/ioprio.c | 21 ++-------- include/linux/iocontext.h | 4 +- kernel/fork.c | 8 ++-- 7 files changed, 91 insertions(+), 69 deletions(-) (limited to 'fs') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8f630cec906..4b001dcd85b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1645,11 +1645,12 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { struct io_context *ioc; - task_lock(tsk); - ioc = tsk->io_context; - if (ioc) + /* we don't lose anything even if ioc allocation fails */ + ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE); + if (ioc) { ioc->cgroup_changed = 1; - task_unlock(tsk); + put_io_context(ioc); + } } void blkio_policy_register(struct blkio_policy_type *blkiop) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 8bebf06bac7..b13ed96776c 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -16,6 +16,19 @@ */ static struct kmem_cache *iocontext_cachep; +/** + * get_io_context - increment reference count to io_context + * @ioc: io_context to get + * + * Increment reference count to @ioc. + */ +void get_io_context(struct io_context *ioc) +{ + BUG_ON(atomic_long_read(&ioc->refcount) <= 0); + atomic_long_inc(&ioc->refcount); +} +EXPORT_SYMBOL(get_io_context); + static void cfq_dtor(struct io_context *ioc) { if (!hlist_empty(&ioc->cic_list)) { @@ -71,6 +84,9 @@ void exit_io_context(struct task_struct *task) { struct io_context *ioc; + /* PF_EXITING prevents new io_context from being attached to @task */ + WARN_ON_ONCE(!(current->flags & PF_EXITING)); + task_lock(task); ioc = task->io_context; task->io_context = NULL; @@ -82,7 +98,9 @@ void exit_io_context(struct task_struct *task) put_io_context(ioc); } -struct io_context *alloc_io_context(gfp_t gfp_flags, int node) +static struct io_context *create_task_io_context(struct task_struct *task, + gfp_t gfp_flags, int node, + bool take_ref) { struct io_context *ioc; @@ -98,6 +116,20 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + /* try to install, somebody might already have beaten us to it */ + task_lock(task); + + if (!task->io_context && !(task->flags & PF_EXITING)) { + task->io_context = ioc; + } else { + kmem_cache_free(iocontext_cachep, ioc); + ioc = task->io_context; + } + + if (ioc && take_ref) + get_io_context(ioc); + + task_unlock(task); return ioc; } @@ -114,46 +146,47 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) */ struct io_context *current_io_context(gfp_t gfp_flags, int node) { - struct task_struct *tsk = current; - struct io_context *ret; - - ret = tsk->io_context; - if (likely(ret)) - return ret; - - ret = alloc_io_context(gfp_flags, node); - if (ret) { - /* make sure set_task_ioprio() sees the settings above */ - smp_wmb(); - tsk->io_context = ret; - } + might_sleep_if(gfp_flags & __GFP_WAIT); - return ret; + if (current->io_context) + return current->io_context; + + return create_task_io_context(current, gfp_flags, node, false); } +EXPORT_SYMBOL(current_io_context); -/* - * If the current task has no IO context then create one and initialise it. - * If it does have a context, take a ref on it. +/** + * get_task_io_context - get io_context of a task + * @task: task of interest + * @gfp_flags: allocation flags, used if allocation is necessary + * @node: allocation node, used if allocation is necessary + * + * Return io_context of @task. If it doesn't exist, it is created with + * @gfp_flags and @node. The returned io_context has its reference count + * incremented. * - * This is always called in the context of the task which submitted the I/O. + * This function always goes through task_lock() and it's better to use + * current_io_context() + get_io_context() for %current. */ -struct io_context *get_io_context(gfp_t gfp_flags, int node) +struct io_context *get_task_io_context(struct task_struct *task, + gfp_t gfp_flags, int node) { - struct io_context *ioc = NULL; - - /* - * Check for unlikely race with exiting task. ioc ref count is - * zero when ioc is being detached. - */ - do { - ioc = current_io_context(gfp_flags, node); - if (unlikely(!ioc)) - break; - } while (!atomic_long_inc_not_zero(&ioc->refcount)); + struct io_context *ioc; - return ioc; + might_sleep_if(gfp_flags & __GFP_WAIT); + + task_lock(task); + ioc = task->io_context; + if (likely(ioc)) { + get_io_context(ioc); + task_unlock(task); + return ioc; + } + task_unlock(task); + + return create_task_io_context(task, gfp_flags, node, true); } -EXPORT_SYMBOL(get_io_context); +EXPORT_SYMBOL(get_task_io_context); static int __init blk_ioc_init(void) { diff --git a/block/blk.h b/block/blk.h index aae4d88fc52..fc3c41b2fd2 100644 --- a/block/blk.h +++ b/block/blk.h @@ -122,6 +122,7 @@ static inline int blk_should_fake_timeout(struct request_queue *q) } #endif +void get_io_context(struct io_context *ioc); struct io_context *current_io_context(gfp_t gfp_flags, int node); int ll_back_merge_fn(struct request_queue *q, struct request *req, diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ec3f5e8ba56..d42d89ccce1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -14,6 +14,7 @@ #include #include #include +#include "blk.h" #include "cfq.h" /* @@ -3194,13 +3195,13 @@ static struct cfq_io_context * cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { struct io_context *ioc = NULL; - struct cfq_io_context *cic; + struct cfq_io_context *cic = NULL; might_sleep_if(gfp_mask & __GFP_WAIT); - ioc = get_io_context(gfp_mask, cfqd->queue->node); + ioc = current_io_context(gfp_mask, cfqd->queue->node); if (!ioc) - return NULL; + goto err; cic = cfq_cic_lookup(cfqd, ioc); if (cic) @@ -3211,10 +3212,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err; if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) - goto err_free; - + goto err; out: - smp_read_barrier_depends(); + get_io_context(ioc); + if (unlikely(ioc->ioprio_changed)) cfq_ioc_set_ioprio(ioc); @@ -3223,10 +3224,9 @@ out: cfq_ioc_set_cgroup(ioc); #endif return cic; -err_free: - cfq_cic_free(cic); err: - put_io_context(ioc); + if (cic) + cfq_cic_free(cic); return NULL; } diff --git a/fs/ioprio.c b/fs/ioprio.c index f79dab83e17..998ec239d1e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -48,28 +48,13 @@ int set_task_ioprio(struct task_struct *task, int ioprio) if (err) return err; - task_lock(task); - do { - ioc = task->io_context; - /* see wmb() in current_io_context() */ - smp_read_barrier_depends(); - if (ioc) - break; - - ioc = alloc_io_context(GFP_ATOMIC, -1); - if (!ioc) { - err = -ENOMEM; - break; - } - task->io_context = ioc; - } while (1); - - if (!err) { + ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); + if (ioc) { ioc->ioprio = ioprio; ioc->ioprio_changed = 1; + put_io_context(ioc); } - task_unlock(task); return err; } EXPORT_SYMBOL_GPL(set_task_ioprio); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 8a6ecb66346..28bb621ef5a 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -78,8 +78,8 @@ struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); void exit_io_context(struct task_struct *task); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +struct io_context *get_task_io_context(struct task_struct *task, + gfp_t gfp_flags, int node); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } diff --git a/kernel/fork.c b/kernel/fork.c index da4a6a10d08..5bcfc739bb7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -870,6 +870,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) { #ifdef CONFIG_BLOCK struct io_context *ioc = current->io_context; + struct io_context *new_ioc; if (!ioc) return 0; @@ -881,11 +882,12 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) if (unlikely(!tsk->io_context)) return -ENOMEM; } else if (ioprio_valid(ioc->ioprio)) { - tsk->io_context = alloc_io_context(GFP_KERNEL, -1); - if (unlikely(!tsk->io_context)) + new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); + if (unlikely(!new_ioc)) return -ENOMEM; - tsk->io_context->ioprio = ioc->ioprio; + new_ioc->ioprio = ioc->ioprio; + put_io_context(new_ioc); } #endif return 0; -- cgit v1.2.3 From dc86900e0a8f665122de6faadd27fb4c6d2b3e4d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:38 +0100 Subject: block, cfq: move ioc ioprio/cgroup changed handling to cic ioprio/cgroup change was handled by marking the changed state in ioc and, on the following access to the ioc, performing RCU-protected iteration through all cic's grabbing the matching queue_lock. This patch moves the changed state to each cic. When ioprio or cgroup changes, the respective bit is set on all cic's of the ioc and when each of those cic (not ioc) is accessed, change is applied for that specific ioc-queue pair. This also fixes the following two race conditions between setting and clearing of changed states. * Missing barrier between assign/load of ioprio and ioprio_changed allowed applying old ioprio. * Change requests could happen between application of change and clearing of changed variables. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-ioc.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ block/cfq-iosched.c | 28 +++++++++------------------- fs/ioprio.c | 3 +-- include/linux/iocontext.h | 14 +++++++++----- 5 files changed, 65 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4b001dcd85b..dc00835aab6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1648,7 +1648,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) /* we don't lose anything even if ioc allocation fails */ ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { - ioc->cgroup_changed = 1; + ioc_cgroup_changed(ioc); put_io_context(ioc); } } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index b13ed96776c..6f59fbad93d 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -188,6 +188,51 @@ struct io_context *get_task_io_context(struct task_struct *task, } EXPORT_SYMBOL(get_task_io_context); +void ioc_set_changed(struct io_context *ioc, int which) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list) + set_bit(which, &cic->changed); +} + +/** + * ioc_ioprio_changed - notify ioprio change + * @ioc: io_context of interest + * @ioprio: new ioprio + * + * @ioc's ioprio has changed to @ioprio. Set %CIC_IOPRIO_CHANGED for all + * cic's. iosched is responsible for checking the bit and applying it on + * request issue path. + */ +void ioc_ioprio_changed(struct io_context *ioc, int ioprio) +{ + unsigned long flags; + + spin_lock_irqsave(&ioc->lock, flags); + ioc->ioprio = ioprio; + ioc_set_changed(ioc, CIC_IOPRIO_CHANGED); + spin_unlock_irqrestore(&ioc->lock, flags); +} + +/** + * ioc_cgroup_changed - notify cgroup change + * @ioc: io_context of interest + * + * @ioc's cgroup has changed. Set %CIC_CGROUP_CHANGED for all cic's. + * iosched is responsible for checking the bit and applying it on request + * issue path. + */ +void ioc_cgroup_changed(struct io_context *ioc) +{ + unsigned long flags; + + spin_lock_irqsave(&ioc->lock, flags); + ioc_set_changed(ioc, CIC_CGROUP_CHANGED); + spin_unlock_irqrestore(&ioc->lock, flags); +} + static int __init blk_ioc_init(void) { iocontext_cachep = kmem_cache_create("blkdev_ioc", diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a612ca65f37..51aece2eea7 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2904,7 +2904,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) cfq_clear_cfqq_prio_changed(cfqq); } -static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) +static void changed_ioprio(struct cfq_io_context *cic) { struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *cfqq; @@ -2933,12 +2933,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } -static void cfq_ioc_set_ioprio(struct io_context *ioc) -{ - call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; -} - static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, pid_t pid, bool is_sync) { @@ -2960,7 +2954,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, } #ifdef CONFIG_CFQ_GROUP_IOSCHED -static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) +static void changed_cgroup(struct cfq_io_context *cic) { struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); struct cfq_data *cfqd = cic_to_cfqd(cic); @@ -2986,12 +2980,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) spin_unlock_irqrestore(q->queue_lock, flags); } - -static void cfq_ioc_set_cgroup(struct io_context *ioc) -{ - call_for_each_cic(ioc, changed_cgroup); - ioc->cgroup_changed = 0; -} #endif /* CONFIG_CFQ_GROUP_IOSCHED */ static struct cfq_queue * @@ -3222,13 +3210,15 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) out: get_io_context(ioc); - if (unlikely(ioc->ioprio_changed)) - cfq_ioc_set_ioprio(ioc); - + if (unlikely(cic->changed)) { + if (test_and_clear_bit(CIC_IOPRIO_CHANGED, &cic->changed)) + changed_ioprio(cic); #ifdef CONFIG_CFQ_GROUP_IOSCHED - if (unlikely(ioc->cgroup_changed)) - cfq_ioc_set_cgroup(ioc); + if (test_and_clear_bit(CIC_CGROUP_CHANGED, &cic->changed)) + changed_cgroup(cic); #endif + } + return cic; err: if (cic) diff --git a/fs/ioprio.c b/fs/ioprio.c index 998ec239d1e..0f1b9515213 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -50,8 +50,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { - ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + ioc_ioprio_changed(ioc, ioprio); put_io_context(ioc); } diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 079aea8fd8a..2c2b6da96b3 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -13,6 +13,11 @@ struct cfq_ttime { unsigned long ttime_mean; }; +enum { + CIC_IOPRIO_CHANGED, + CIC_CGROUP_CHANGED, +}; + struct cfq_io_context { void *key; struct request_queue *q; @@ -26,6 +31,8 @@ struct cfq_io_context { struct list_head queue_list; struct hlist_node cic_list; + unsigned long changed; + void (*dtor)(struct io_context *); /* destructor */ void (*exit)(struct io_context *); /* called on task exit */ @@ -44,11 +51,6 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; - -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) - unsigned short cgroup_changed; -#endif /* * For request batching @@ -81,6 +83,8 @@ void put_io_context(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); +void ioc_ioprio_changed(struct io_context *ioc, int ioprio); +void ioc_cgroup_changed(struct io_context *ioc); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } -- cgit v1.2.3 From b2efa05265d62bc29f3a64400fad4b44340eedb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Dec 2011 00:33:39 +0100 Subject: block, cfq: unlink cfq_io_context's immediately cic is association between io_context and request_queue. A cic is linked from both ioc and q and should be destroyed when either one goes away. As ioc and q both have their own locks, locking becomes a bit complex - both orders work for removal from one but not from the other. Currently, cfq tries to circumvent this locking order issue with RCU. ioc->lock nests inside queue_lock but the radix tree and cic's are also protected by RCU allowing either side to walk their lists without grabbing lock. This rather unconventional use of RCU quickly devolves into extremely fragile convolution. e.g. The following is from cfqd going away too soon after ioc and q exits raced. general protection fault: 0000 [#1] PREEMPT SMP CPU 2 Modules linked in: [ 88.503444] Pid: 599, comm: hexdump Not tainted 3.1.0-rc10-work+ #158 Bochs Bochs RIP: 0010:[] [] cfq_exit_single_io_context+0x58/0xf0 ... Call Trace: [] call_for_each_cic+0x5a/0x90 [] cfq_exit_io_context+0x15/0x20 [] exit_io_context+0x100/0x140 [] do_exit+0x579/0x850 [] do_group_exit+0x5b/0xd0 [] sys_exit_group+0x17/0x20 [] system_call_fastpath+0x16/0x1b The only real hot path here is cic lookup during request initialization and avoiding extra locking requires very confined use of RCU. This patch makes cic removal from both ioc and request_queue perform double-locking and unlink immediately. * From q side, the change is almost trivial as ioc->lock nests inside queue_lock. It just needs to grab each ioc->lock as it walks cic_list and unlink it. * From ioc side, it's a bit more difficult because of inversed lock order. ioc needs its lock to walk its cic_list but can't grab the matching queue_lock and needs to perform unlock-relock dancing. Unlinking is now wholly done from put_io_context() and fast path is optimized by using the queue_lock the caller already holds, which is by far the most common case. If the ioc accessed multiple devices, it tries with trylock. In unlikely cases of fast path failure, it falls back to full double-locking dance from workqueue. Double-locking isn't the prettiest thing in the world but it's *far* simpler and more understandable than RCU trick without adding any meaningful overhead. This still leaves a lot of now unnecessary RCU logics. Future patches will trim them. -v2: Vivek pointed out that cic->q was being dereferenced after cic->release() was called. Updated to use local variable @this_q instead. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-ioc.c | 166 ++++++++++++++++++++++++++++++++++++++-------- block/cfq-iosched.c | 44 +++--------- fs/ioprio.c | 2 +- include/linux/blkdev.h | 3 + include/linux/iocontext.h | 12 ++-- kernel/fork.c | 2 +- 7 files changed, 159 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index dc00835aab6..27886935804 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1649,7 +1649,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_cgroup_changed(ioc); - put_io_context(ioc); + put_io_context(ioc, NULL); } } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f59fbad93d..fb23965595d 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -29,55 +29,164 @@ void get_io_context(struct io_context *ioc) } EXPORT_SYMBOL(get_io_context); -static void cfq_dtor(struct io_context *ioc) +/* + * Releasing ioc may nest into another put_io_context() leading to nested + * fast path release. As the ioc's can't be the same, this is okay but + * makes lockdep whine. Keep track of nesting and use it as subclass. + */ +#ifdef CONFIG_LOCKDEP +#define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) +#define ioc_release_depth_inc(q) (q)->ioc_release_depth++ +#define ioc_release_depth_dec(q) (q)->ioc_release_depth-- +#else +#define ioc_release_depth(q) 0 +#define ioc_release_depth_inc(q) do { } while (0) +#define ioc_release_depth_dec(q) do { } while (0) +#endif + +/* + * Slow path for ioc release in put_io_context(). Performs double-lock + * dancing to unlink all cic's and then frees ioc. + */ +static void ioc_release_fn(struct work_struct *work) { - if (!hlist_empty(&ioc->cic_list)) { - struct cfq_io_context *cic; + struct io_context *ioc = container_of(work, struct io_context, + release_work); + struct request_queue *last_q = NULL; + + spin_lock_irq(&ioc->lock); + + while (!hlist_empty(&ioc->cic_list)) { + struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first, + struct cfq_io_context, + cic_list); + struct request_queue *this_q = cic->q; + + if (this_q != last_q) { + /* + * Need to switch to @this_q. Once we release + * @ioc->lock, it can go away along with @cic. + * Hold on to it. + */ + __blk_get_queue(this_q); + + /* + * blk_put_queue() might sleep thanks to kobject + * idiocy. Always release both locks, put and + * restart. + */ + if (last_q) { + spin_unlock(last_q->queue_lock); + spin_unlock_irq(&ioc->lock); + blk_put_queue(last_q); + } else { + spin_unlock_irq(&ioc->lock); + } + + last_q = this_q; + spin_lock_irq(this_q->queue_lock); + spin_lock(&ioc->lock); + continue; + } + ioc_release_depth_inc(this_q); + cic->exit(cic); + cic->release(cic); + ioc_release_depth_dec(this_q); + } - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); - cic->dtor(ioc); + if (last_q) { + spin_unlock(last_q->queue_lock); + spin_unlock_irq(&ioc->lock); + blk_put_queue(last_q); + } else { + spin_unlock_irq(&ioc->lock); } + + kmem_cache_free(iocontext_cachep, ioc); } /** * put_io_context - put a reference of io_context * @ioc: io_context to put + * @locked_q: request_queue the caller is holding queue_lock of (hint) * * Decrement reference count of @ioc and release it if the count reaches - * zero. + * zero. If the caller is holding queue_lock of a queue, it can indicate + * that with @locked_q. This is an optimization hint and the caller is + * allowed to pass in %NULL even when it's holding a queue_lock. */ -void put_io_context(struct io_context *ioc) +void put_io_context(struct io_context *ioc, struct request_queue *locked_q) { + struct request_queue *last_q = locked_q; + unsigned long flags; + if (ioc == NULL) return; BUG_ON(atomic_long_read(&ioc->refcount) <= 0); + if (locked_q) + lockdep_assert_held(locked_q->queue_lock); if (!atomic_long_dec_and_test(&ioc->refcount)) return; - rcu_read_lock(); - cfq_dtor(ioc); - rcu_read_unlock(); - - kmem_cache_free(iocontext_cachep, ioc); -} -EXPORT_SYMBOL(put_io_context); + /* + * Destroy @ioc. This is a bit messy because cic's are chained + * from both ioc and queue, and ioc->lock nests inside queue_lock. + * The inner ioc->lock should be held to walk our cic_list and then + * for each cic the outer matching queue_lock should be grabbed. + * ie. We need to do reverse-order double lock dancing. + * + * Another twist is that we are often called with one of the + * matching queue_locks held as indicated by @locked_q, which + * prevents performing double-lock dance for other queues. + * + * So, we do it in two stages. The fast path uses the queue_lock + * the caller is holding and, if other queues need to be accessed, + * uses trylock to avoid introducing locking dependency. This can + * handle most cases, especially if @ioc was performing IO on only + * single device. + * + * If trylock doesn't cut it, we defer to @ioc->release_work which + * can do all the double-locking dancing. + */ + spin_lock_irqsave_nested(&ioc->lock, flags, + ioc_release_depth(locked_q)); + + while (!hlist_empty(&ioc->cic_list)) { + struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first, + struct cfq_io_context, + cic_list); + struct request_queue *this_q = cic->q; + + if (this_q != last_q) { + if (last_q && last_q != locked_q) + spin_unlock(last_q->queue_lock); + last_q = NULL; + + if (!spin_trylock(this_q->queue_lock)) + break; + last_q = this_q; + continue; + } + ioc_release_depth_inc(this_q); + cic->exit(cic); + cic->release(cic); + ioc_release_depth_dec(this_q); + } -static void cfq_exit(struct io_context *ioc) -{ - rcu_read_lock(); + if (last_q && last_q != locked_q) + spin_unlock(last_q->queue_lock); - if (!hlist_empty(&ioc->cic_list)) { - struct cfq_io_context *cic; + spin_unlock_irqrestore(&ioc->lock, flags); - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); - cic->exit(ioc); - } - rcu_read_unlock(); + /* if no cic's left, we're done; otherwise, kick release_work */ + if (hlist_empty(&ioc->cic_list)) + kmem_cache_free(iocontext_cachep, ioc); + else + schedule_work(&ioc->release_work); } +EXPORT_SYMBOL(put_io_context); /* Called by the exiting task */ void exit_io_context(struct task_struct *task) @@ -92,10 +201,8 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - - put_io_context(ioc); + atomic_dec(&ioc->nr_tasks); + put_io_context(ioc, NULL); } static struct io_context *create_task_io_context(struct task_struct *task, @@ -115,6 +222,7 @@ static struct io_context *create_task_io_context(struct task_struct *task, spin_lock_init(&ioc->lock); INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_WORK(&ioc->release_work, ioc_release_fn); /* try to install, somebody might already have beaten us to it */ task_lock(task); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e617b088c59..6cc60656040 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1778,7 +1778,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->active_queue = NULL; if (cfqd->active_cic) { - put_io_context(cfqd->active_cic->ioc); + put_io_context(cfqd->active_cic->ioc, cfqd->queue); cfqd->active_cic = NULL; } } @@ -2812,38 +2812,6 @@ static void cfq_exit_cic(struct cfq_io_context *cic) } } -static void cfq_exit_single_io_context(struct io_context *ioc, - struct cfq_io_context *cic) -{ - struct cfq_data *cfqd = cic_to_cfqd(cic); - - if (cfqd) { - struct request_queue *q = cfqd->queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - - /* - * Ensure we get a fresh copy of the ->key to prevent - * race between exiting task and queue - */ - smp_read_barrier_depends(); - if (cic->key == cfqd) - cfq_exit_cic(cic); - - spin_unlock_irqrestore(q->queue_lock, flags); - } -} - -/* - * The process that ioc belongs to has exited, we need to clean up - * and put the internal structures we have that belongs to that process. - */ -static void cfq_exit_io_context(struct io_context *ioc) -{ - call_for_each_cic(ioc, cfq_exit_single_io_context); -} - static struct cfq_io_context * cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { @@ -2855,8 +2823,8 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cic->ttime.last_end_request = jiffies; INIT_LIST_HEAD(&cic->queue_list); INIT_HLIST_NODE(&cic->cic_list); - cic->dtor = cfq_free_io_context; - cic->exit = cfq_exit_io_context; + cic->exit = cfq_exit_cic; + cic->release = cfq_release_cic; elv_ioc_count_inc(cfq_ioc_count); } @@ -3726,7 +3694,7 @@ static void cfq_put_request(struct request *rq) BUG_ON(!cfqq->allocated[rw]); cfqq->allocated[rw]--; - put_io_context(RQ_CIC(rq)->ioc); + put_io_context(RQ_CIC(rq)->ioc, cfqq->cfqd->queue); rq->elevator_private[0] = NULL; rq->elevator_private[1] = NULL; @@ -3937,8 +3905,12 @@ static void cfq_exit_queue(struct elevator_queue *e) struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, struct cfq_io_context, queue_list); + struct io_context *ioc = cic->ioc; + spin_lock(&ioc->lock); cfq_exit_cic(cic); + cfq_release_cic(cic); + spin_unlock(&ioc->lock); } cfq_put_async_queues(cfqd); diff --git a/fs/ioprio.c b/fs/ioprio.c index 0f1b9515213..f84b380d65e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc); + put_io_context(ioc, NULL); } return err; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d1b6f4ed1f9..65c2f8c7008 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -393,6 +393,9 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif +#ifdef CONFIG_LOCKDEP + int ioc_release_depth; +#endif }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 2c2b6da96b3..01e86312878 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -3,6 +3,7 @@ #include #include +#include struct cfq_queue; struct cfq_ttime { @@ -33,8 +34,8 @@ struct cfq_io_context { unsigned long changed; - void (*dtor)(struct io_context *); /* destructor */ - void (*exit)(struct io_context *); /* called on task exit */ + void (*exit)(struct cfq_io_context *); + void (*release)(struct cfq_io_context *); struct rcu_head rcu_head; }; @@ -61,6 +62,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; void __rcu *ioc_data; + + struct work_struct release_work; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) @@ -79,7 +82,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) struct task_struct; #ifdef CONFIG_BLOCK -void put_io_context(struct io_context *ioc); +void put_io_context(struct io_context *ioc, struct request_queue *locked_q); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); @@ -87,7 +90,8 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio); void ioc_cgroup_changed(struct io_context *ioc); #else struct io_context; -static inline void put_io_context(struct io_context *ioc) { } +static inline void put_io_context(struct io_context *ioc, + struct request_queue *locked_q) { } static inline void exit_io_context(struct task_struct *task) { } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 5bcfc739bb7..2753449f203 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -887,7 +887,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; new_ioc->ioprio = ioc->ioprio; - put_io_context(new_ioc); + put_io_context(new_ioc, NULL); } #endif return 0; -- cgit v1.2.3 From 2d3475c0ad625f7a43844a6cd0130d136416e604 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 14 Dec 2011 14:39:56 -0500 Subject: NFSD: forget_delegations should use list_for_each_entry_safe Otherwise the for loop could try to use a file recently removed from the file_hashtbl list and oops. Signed-off-by: Bryan Schumaker Tested-by: Casey Bodley Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 19ca9b54200..7355fe405d6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4505,18 +4505,19 @@ void nfsd_forget_openowners(u64 num) int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) { int i, count = 0; - struct nfs4_file *fp; - struct nfs4_delegation *dp, *next; + struct nfs4_file *fp, *fnext; + struct nfs4_delegation *dp, *dnext; for (i = 0; i < FILE_HASH_SIZE; i++) { - list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { - list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) { + list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { + list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { deleg_func(dp); if (++count == num) return count; } } } + return count; } -- cgit v1.2.3 From 1bc36b6426ae49139e9f56491db76b95921454d7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Oct 2011 11:44:41 +0200 Subject: writeback: Include all dirty inodes in background writeback Current livelock avoidance code makes background work to include only inodes that were dirtied before background writeback has started. However background writeback can be running for a long time and thus excluding newly dirtied inodes can eventually exclude significant portion of dirty inodes making background writeback inefficient. Since background writeback avoids livelocking the flusher thread by yielding to any other work, there is no real reason why background work should not include all dirty inodes so change the logic in wb_writeback(). Signed-off-by: Jan Kara Signed-off-by: Wu Fengguang --- fs/fs-writeback.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 517f211a3bd..92d353e069d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -743,11 +743,17 @@ static long wb_writeback(struct bdi_writeback *wb, if (work->for_background && !over_bground_thresh(wb->bdi)) break; + /* + * Kupdate and background works are special and we want to + * include all inodes that need writing. Livelock avoidance is + * handled by these works yielding to any other work so we are + * safe. + */ if (work->for_kupdate) { oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); - work->older_than_this = &oldest_jif; - } + } else if (work->for_background) + oldest_jif = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) -- cgit v1.2.3 From 32c7f202a4801252a0f3578807b75a961f792870 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 8 Aug 2011 15:19:47 -0600 Subject: btrfs: fix dirtied pages accounting on sub-page writes When doing 1KB sequential writes to the same page, balance_dirty_pages_ratelimited_nr() should be called once instead of 4 times, the latter makes the dirtier tasks be throttled much too heavy. Fix it with proper de-accounting on clear_page_dirty_for_io(). CC: Chris Mason Signed-off-by: Wu Fengguang --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c05..bfb620ead29 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1136,7 +1136,8 @@ again: GFP_NOFS); } for (i = 0; i < num_pages; i++) { - clear_page_dirty_for_io(pages[i]); + if (clear_page_dirty_for_io(pages[i])) + account_page_redirty(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } -- cgit v1.2.3 From 60e07cf515e541ea3e13b888d273c9b19a2ad9dd Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 18 Dec 2011 15:49:54 -0500 Subject: ext4: do not reference pa_inode from group_pa pa_inode in group_pa is set NULL in ext4_mb_new_group_pa, so pa_inode should be not referenced. Reported-by: Wu Fengguang Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- include/trace/events/ext4.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e2d8be8f28b..cb990b21c69 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ext4_group_t group; ext4_grpblk_t bit; - trace_ext4_mb_release_group_pa(pa); + trace_ext4_mb_release_group_pa(sb, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 748ff7cbe55..319538bf17d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ); TRACE_EVENT(ext4_mb_release_group_pa, - TP_PROTO(struct ext4_prealloc_space *pa), + TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), - TP_ARGS(pa), + TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ), TP_fast_assign( - __entry->dev = pa->pa_inode->i_sb->s_dev; + __entry->dev = sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), -- cgit v1.2.3 From 5635a62b83c04d05e4eb4575a1c3de51a35bacdc Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 18 Dec 2011 16:13:58 -0500 Subject: ext4: add missing space to ext4_msg output in ext4_fill_super() Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e1329e2f82..35377d57ec4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3508,7 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * of the filesystem. */ if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - ext4_msg(sb, KERN_WARNING, "bad geometry: first data" + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " "block %u is beyond end of filesystem (%llu)", le32_to_cpu(es->s_first_data_block), ext4_blocks_count(es)); -- cgit v1.2.3 From acd6ad83517639e8f09a8c5525b1dccd81cd2a10 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 18 Dec 2011 17:37:02 -0500 Subject: ext4: fix error handling on inode bitmap corruption When insert_inode_locked() fails in ext4_new_inode() it most likely means inode bitmap got corrupted and we allocated again inode which is already in use. Also doing unlock_new_inode() during error recovery is wrong since the inode does not have I_NEW set. Fix the problem by jumping to fail: (instead of fail_drop:) which declares filesystem error and does not call unlock_new_inode(). Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00beb4f9cc4..8fb6844f973 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -885,8 +885,12 @@ got: if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; -- cgit v1.2.3 From 14d7f3efe923bc60839c65f9818793c64b4d708b Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sun, 18 Dec 2011 17:39:02 -0500 Subject: ext4: remove unused local variable In get_implied_cluster_alloc(), rr_cluster_end was being defined and set, but was never used. Removed this. Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 607b1557d29..4423b11476a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3625,7 +3625,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); ext4_lblk_t ex_cluster_start, ex_cluster_end; - ext4_lblk_t rr_cluster_start, rr_cluster_end; + ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext4_ext_pblock(ex); unsigned short ee_len = ext4_ext_get_actual_len(ex); @@ -3636,7 +3636,6 @@ static int get_implied_cluster_alloc(struct super_block *sb, /* The requested region passed into ext4_map_blocks() */ rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); - rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); if ((rr_cluster_start == ex_cluster_end) || (rr_cluster_start == ex_cluster_start)) { -- cgit v1.2.3 From 8c48f7e88e293b9dd422bd8884842aea85d30b22 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sun, 18 Dec 2011 23:05:43 -0500 Subject: ext4: optimize ext4_find_delalloc_range() in nodelalloc mode We found performance regression when using bigalloc with "nodelalloc" (1MB cluster size): 1. mke2fs -C 1048576 -O ^has_journal,bigalloc /dev/sda 2. mount -o nodelalloc /dev/sda /test/ 3. time dd if=/dev/zero of=/test/io bs=1048576 count=1024 The "dd" will cost about 2 seconds to finish, but if we mke2fs without "bigalloc", "dd" will only cost less than 1 second. The reason is: when using ext4 with "nodelalloc", it will call ext4_find_delalloc_cluster() nearly everytime it call ext4_ext_map_blocks(), and ext4_find_delalloc_range() will also scan all pages in cluster because no buffer is "delayed". A cluster has 256 pages (1MB cluster), so it will scan 256 * 256k pags when creating a 1G file. That severely hurts the performance. Therefore, we return immediately from ext4_find_delalloc_range() in nodelalloc mode, since by definition there can't be any delalloc pages. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4423b11476a..5684f251092 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3281,6 +3281,9 @@ static int ext4_find_delalloc_range(struct inode *inode, ext4_lblk_t i, pg_lblk; pgoff_t index; + if (!test_opt(inode->i_sb, DELALLOC)) + return 0; + /* reverse search wont work if fs block size is less than page size */ if (inode->i_blkbits < PAGE_CACHE_SHIFT) search_hint_reverse = 0; -- cgit v1.2.3 From 5db0276014b80484689eb6c1bf7b94af1c7d5b1a Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 1 Nov 2011 17:04:16 +0100 Subject: Btrfs: add optional integrity check code The two files added in this patch contain all the code that is required to implement the integrity checks. Signed-off-by: Stefan Behrens --- fs/btrfs/check-integrity.c | 3068 ++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/check-integrity.h | 36 + 2 files changed, 3104 insertions(+) create mode 100644 fs/btrfs/check-integrity.c create mode 100644 fs/btrfs/check-integrity.h (limited to 'fs') diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c new file mode 100644 index 00000000000..ad0b3ba735b --- /dev/null +++ b/fs/btrfs/check-integrity.c @@ -0,0 +1,3068 @@ +/* + * Copyright (C) STRATO AG 2011. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +/* + * This module can be used to catch cases when the btrfs kernel + * code executes write requests to the disk that bring the file + * system in an inconsistent state. In such a state, a power-loss + * or kernel panic event would cause that the data on disk is + * lost or at least damaged. + * + * Code is added that examines all block write requests during + * runtime (including writes of the super block). Three rules + * are verified and an error is printed on violation of the + * rules: + * 1. It is not allowed to write a disk block which is + * currently referenced by the super block (either directly + * or indirectly). + * 2. When a super block is written, it is verified that all + * referenced (directly or indirectly) blocks fulfill the + * following requirements: + * 2a. All referenced blocks have either been present when + * the file system was mounted, (i.e., they have been + * referenced by the super block) or they have been + * written since then and the write completion callback + * was called and a FLUSH request to the device where + * these blocks are located was received and completed. + * 2b. All referenced blocks need to have a generation + * number which is equal to the parent's number. + * + * One issue that was found using this module was that the log + * tree on disk became temporarily corrupted because disk blocks + * that had been in use for the log tree had been freed and + * reused too early, while being referenced by the written super + * block. + * + * The search term in the kernel log that can be used to filter + * on the existence of detected integrity issues is + * "btrfs: attempt". + * + * The integrity check is enabled via mount options. These + * mount options are only supported if the integrity check + * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. + * + * Example #1, apply integrity checks to all metadata: + * mount /dev/sdb1 /mnt -o check_int + * + * Example #2, apply integrity checks to all metadata and + * to data extents: + * mount /dev/sdb1 /mnt -o check_int_data + * + * Example #3, apply integrity checks to all metadata and dump + * the tree that the super block references to kernel messages + * each time after a super block was written: + * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 + * + * If the integrity check tool is included and activated in + * the mount options, plenty of kernel memory is used, and + * plenty of additional CPU cycles are spent. Enabling this + * functionality is not intended for normal use. In most + * cases, unless you are a btrfs developer who needs to verify + * the integrity of (super)-block write requests, do not + * enable the config option BTRFS_FS_CHECK_INTEGRITY to + * include and compile the integrity check tool. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "extent_io.h" +#include "disk-io.h" +#include "volumes.h" +#include "print-tree.h" +#include "locking.h" +#include "check-integrity.h" + +#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 +#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 +#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 +#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 +#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 +#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 +#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 +#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, + * excluding " [...]" */ +#define BTRFSIC_BLOCK_SIZE PAGE_SIZE + +#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) + +/* + * The definition of the bitmask fields for the print_mask. + * They are specified with the mount option check_integrity_print_mask. + */ +#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 +#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 +#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 +#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 +#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 +#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 +#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 +#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 +#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 +#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 +#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 +#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 +#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 + +struct btrfsic_dev_state; +struct btrfsic_state; + +struct btrfsic_block { + u32 magic_num; /* only used for debug purposes */ + unsigned int is_metadata:1; /* if it is meta-data, not data-data */ + unsigned int is_superblock:1; /* if it is one of the superblocks */ + unsigned int is_iodone:1; /* if is done by lower subsystem */ + unsigned int iodone_w_error:1; /* error was indicated to endio */ + unsigned int never_written:1; /* block was added because it was + * referenced, not because it was + * written */ + unsigned int mirror_num:2; /* large enough to hold + * BTRFS_SUPER_MIRROR_MAX */ + struct btrfsic_dev_state *dev_state; + u64 dev_bytenr; /* key, physical byte num on disk */ + u64 logical_bytenr; /* logical byte num on disk */ + u64 generation; + struct btrfs_disk_key disk_key; /* extra info to print in case of + * issues, will not always be correct */ + struct list_head collision_resolving_node; /* list node */ + struct list_head all_blocks_node; /* list node */ + + /* the following two lists contain block_link items */ + struct list_head ref_to_list; /* list */ + struct list_head ref_from_list; /* list */ + struct btrfsic_block *next_in_same_bio; + void *orig_bio_bh_private; + union { + bio_end_io_t *bio; + bh_end_io_t *bh; + } orig_bio_bh_end_io; + int submit_bio_bh_rw; + u64 flush_gen; /* only valid if !never_written */ +}; + +/* + * Elements of this type are allocated dynamically and required because + * each block object can refer to and can be ref from multiple blocks. + * The key to lookup them in the hashtable is the dev_bytenr of + * the block ref to plus the one from the block refered from. + * The fact that they are searchable via a hashtable and that a + * ref_cnt is maintained is not required for the btrfs integrity + * check algorithm itself, it is only used to make the output more + * beautiful in case that an error is detected (an error is defined + * as a write operation to a block while that block is still referenced). + */ +struct btrfsic_block_link { + u32 magic_num; /* only used for debug purposes */ + u32 ref_cnt; + struct list_head node_ref_to; /* list node */ + struct list_head node_ref_from; /* list node */ + struct list_head collision_resolving_node; /* list node */ + struct btrfsic_block *block_ref_to; + struct btrfsic_block *block_ref_from; + u64 parent_generation; +}; + +struct btrfsic_dev_state { + u32 magic_num; /* only used for debug purposes */ + struct block_device *bdev; + struct btrfsic_state *state; + struct list_head collision_resolving_node; /* list node */ + struct btrfsic_block dummy_block_for_bio_bh_flush; + u64 last_flush_gen; + char name[BDEVNAME_SIZE]; +}; + +struct btrfsic_block_hashtable { + struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; +}; + +struct btrfsic_block_link_hashtable { + struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; +}; + +struct btrfsic_dev_state_hashtable { + struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; +}; + +struct btrfsic_block_data_ctx { + u64 start; /* virtual bytenr */ + u64 dev_bytenr; /* physical bytenr on device */ + u32 len; + struct btrfsic_dev_state *dev; + char *data; + struct buffer_head *bh; /* do not use if set to NULL */ +}; + +/* This structure is used to implement recursion without occupying + * any stack space, refer to btrfsic_process_metablock() */ +struct btrfsic_stack_frame { + u32 magic; + u32 nr; + int error; + int i; + int limit_nesting; + int num_copies; + int mirror_num; + struct btrfsic_block *block; + struct btrfsic_block_data_ctx *block_ctx; + struct btrfsic_block *next_block; + struct btrfsic_block_data_ctx next_block_ctx; + struct btrfs_header *hdr; + struct btrfsic_stack_frame *prev; +}; + +/* Some state per mounted filesystem */ +struct btrfsic_state { + u32 print_mask; + int include_extent_data; + int csum_size; + struct list_head all_blocks_list; + struct btrfsic_block_hashtable block_hashtable; + struct btrfsic_block_link_hashtable block_link_hashtable; + struct btrfs_root *root; + u64 max_superblock_generation; + struct btrfsic_block *latest_superblock; +}; + +static void btrfsic_block_init(struct btrfsic_block *b); +static struct btrfsic_block *btrfsic_block_alloc(void); +static void btrfsic_block_free(struct btrfsic_block *b); +static void btrfsic_block_link_init(struct btrfsic_block_link *n); +static struct btrfsic_block_link *btrfsic_block_link_alloc(void); +static void btrfsic_block_link_free(struct btrfsic_block_link *n); +static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); +static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); +static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); +static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); +static void btrfsic_block_hashtable_add(struct btrfsic_block *b, + struct btrfsic_block_hashtable *h); +static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); +static struct btrfsic_block *btrfsic_block_hashtable_lookup( + struct block_device *bdev, + u64 dev_bytenr, + struct btrfsic_block_hashtable *h); +static void btrfsic_block_link_hashtable_init( + struct btrfsic_block_link_hashtable *h); +static void btrfsic_block_link_hashtable_add( + struct btrfsic_block_link *l, + struct btrfsic_block_link_hashtable *h); +static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); +static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( + struct block_device *bdev_ref_to, + u64 dev_bytenr_ref_to, + struct block_device *bdev_ref_from, + u64 dev_bytenr_ref_from, + struct btrfsic_block_link_hashtable *h); +static void btrfsic_dev_state_hashtable_init( + struct btrfsic_dev_state_hashtable *h); +static void btrfsic_dev_state_hashtable_add( + struct btrfsic_dev_state *ds, + struct btrfsic_dev_state_hashtable *h); +static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); +static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( + struct block_device *bdev, + struct btrfsic_dev_state_hashtable *h); +static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); +static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); +static int btrfsic_process_superblock(struct btrfsic_state *state, + struct btrfs_fs_devices *fs_devices); +static int btrfsic_process_metablock(struct btrfsic_state *state, + struct btrfsic_block *block, + struct btrfsic_block_data_ctx *block_ctx, + struct btrfs_header *hdr, + int limit_nesting, int force_iodone_flag); +static int btrfsic_create_link_to_next_block( + struct btrfsic_state *state, + struct btrfsic_block *block, + struct btrfsic_block_data_ctx + *block_ctx, u64 next_bytenr, + int limit_nesting, + struct btrfsic_block_data_ctx *next_block_ctx, + struct btrfsic_block **next_blockp, + int force_iodone_flag, + int *num_copiesp, int *mirror_nump, + struct btrfs_disk_key *disk_key, + u64 parent_generation); +static int btrfsic_handle_extent_data(struct btrfsic_state *state, + struct btrfsic_block *block, + struct btrfsic_block_data_ctx *block_ctx, + u32 item_offset, int force_iodone_flag); +static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, + struct btrfsic_block_data_ctx *block_ctx_out, + int mirror_num); +static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, + u32 len, struct block_device *bdev, + struct btrfsic_block_data_ctx *block_ctx_out); +static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); +static int btrfsic_read_block(struct btrfsic_state *state, + struct btrfsic_block_data_ctx *block_ctx); +static void btrfsic_dump_database(struct btrfsic_state *state); +static int btrfsic_test_for_metadata(struct btrfsic_state *state, + const u8 *data, unsigned int size); +static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, + u64 dev_bytenr, u8 *mapped_data, + unsigned int len, struct bio *bio, + int *bio_is_patched, + struct buffer_head *bh, + int submit_bio_bh_rw); +static int btrfsic_process_written_superblock( + struct btrfsic_state *state, + struct btrfsic_block *const block, + struct btrfs_super_block *const super_hdr); +static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); +static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); +static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, + const struct btrfsic_block *block, + int recursion_level); +static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, + struct btrfsic_block *const block, + int recursion_level); +static void btrfsic_print_add_link(const struct btrfsic_state *state, + const struct btrfsic_block_link *l); +static void btrfsic_print_rem_link(const struct btrfsic_state *state, + const struct btrfsic_block_link *l); +static char btrfsic_get_block_type(const struct btrfsic_state *state, + const struct btrfsic_block *block); +static void btrfsic_dump_tree(const struct btrfsic_state *state); +static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, + const struct btrfsic_block *block, + int indent_level); +static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( + struct btrfsic_state *state, + struct btrfsic_block_data_ctx *next_block_ctx, + struct btrfsic_block *next_block, + struct btrfsic_block *from_block, + u64 parent_generation); +static struct btrfsic_block *btrfsic_block_lookup_or_add( + struct btrfsic_state *state, + struct btrfsic_block_data_ctx *block_ctx, + const char *additional_string, + int is_metadata, + int is_iodone, + int never_written, + int mirror_num, + int *was_created); +static int btrfsic_process_superblock_dev_mirror( + struct btrfsic_state *state, + struct btrfsic_dev_state *dev_state, + struct btrfs_device *device, + int superblock_mirror_num, + struct btrfsic_dev_state **selected_dev_state, + struct btrfs_super_block *selected_super); +static struct btrfsic_dev_state *btrfsic_dev_state_lookup( + struct block_device *bdev); +static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, + u64 bytenr, + struct btrfsic_dev_state *dev_state, + u64 dev_bytenr, char *data); + +static struct mutex btrfsic_mutex; +static int btrfsic_is_initialized; +static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; + + +static void btrfsic_block_init(struct btrfsic_block *b) +{ + b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; + b->dev_state = NULL; + b->dev_bytenr = 0; + b->logical_bytenr = 0; + b->generation = BTRFSIC_GENERATION_UNKNOWN; + b->disk_key.objectid = 0; + b->disk_key.type = 0; + b->disk_key.offset = 0; + b->is_metadata = 0; + b->is_superblock = 0; + b->is_iodone = 0; + b->iodone_w_error = 0; + b->never_written = 0; + b->mirror_num = 0; + b->next_in_same_bio = NULL; + b->orig_bio_bh_private = NULL; + b->orig_bio_bh_end_io.bio = NULL; + INIT_LIST_HEAD(&b->collision_resolving_node); + INIT_LIST_HEAD(&b->all_blocks_node); + INIT_LIST_HEAD(&b->ref_to_list); + INIT_LIST_HEAD(&b->ref_from_list); + b->submit_bio_bh_rw = 0; + b->flush_gen = 0; +} + +static struct btrfsic_block *btrfsic_block_alloc(void) +{ + struct btrfsic_block *b; + + b = kzalloc(sizeof(*b), GFP_NOFS); + if (NULL != b) + btrfsic_block_init(b); + + return b; +} + +static void btrfsic_block_free(struct btrfsic_block *b) +{ + BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); + kfree(b); +} + +static void btrfsic_block_link_init(struct btrfsic_block_link *l) +{ + l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; + l->ref_cnt = 1; + INIT_LIST_HEAD(&l->node_ref_to); + INIT_LIST_HEAD(&l->node_ref_from); + INIT_LIST_HEAD(&l->collision_resolving_node); + l->block_ref_to = NULL; + l->block_ref_from = NULL; +} + +static struct btrfsic_block_link *btrfsic_block_link_alloc(void) +{ + struct btrfsic_block_link *l; + + l = kzalloc(sizeof(*l), GFP_NOFS); + if (NULL != l) + btrfsic_block_link_init(l); + + return l; +} + +static void btrfsic_block_link_free(struct btrfsic_block_link *l) +{ + BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); + kfree(l); +} + +static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) +{ + ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; + ds->bdev = NULL; + ds->state = NULL; + ds->name[0] = '\0'; + INIT_LIST_HEAD(&ds->collision_resolving_node); + ds->last_flush_gen = 0; + btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); + ds->dummy_block_for_bio_bh_flush.is_iodone = 1; + ds->dummy_block_for_bio_bh_flush.dev_state = ds; +} + +static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) +{ + struct btrfsic_dev_state *ds; + + ds = kzalloc(sizeof(*ds), GFP_NOFS); + if (NULL != ds) + btrfsic_dev_state_init(ds); + + return ds; +} + +static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) +{ + BUG_ON(!(NULL == ds || + BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); + kfree(ds); +} + +static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) +{ + int i; + + for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) + INIT_LIST_HEAD(h->table + i); +} + +static void btrfsic_block_hashtable_add(struct btrfsic_block *b, + struct btrfsic_block_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)(b->dev_bytenr >> 16)) ^ + ((unsigned int)((uintptr_t)b->dev_state->bdev))) & + (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); + + list_add(&b->collision_resolving_node, h->table + hashval); +} + +static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) +{ + list_del(&b->collision_resolving_node); +} + +static struct btrfsic_block *btrfsic_block_hashtable_lookup( + struct block_device *bdev, + u64 dev_bytenr, + struct btrfsic_block_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)(dev_bytenr >> 16)) ^ + ((unsigned int)((uintptr_t)bdev))) & + (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); + struct list_head *elem; + + list_for_each(elem, h->table + hashval) { + struct btrfsic_block *const b = + list_entry(elem, struct btrfsic_block, + collision_resolving_node); + + if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) + return b; + } + + return NULL; +} + +static void btrfsic_block_link_hashtable_init( + struct btrfsic_block_link_hashtable *h) +{ + int i; + + for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) + INIT_LIST_HEAD(h->table + i); +} + +static void btrfsic_block_link_hashtable_add( + struct btrfsic_block_link *l, + struct btrfsic_block_link_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ + ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ + ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ + ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) + & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); + + BUG_ON(NULL == l->block_ref_to); + BUG_ON(NULL == l->block_ref_from); + list_add(&l->collision_resolving_node, h->table + hashval); +} + +static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) +{ + list_del(&l->collision_resolving_node); +} + +static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( + struct block_device *bdev_ref_to, + u64 dev_bytenr_ref_to, + struct block_device *bdev_ref_from, + u64 dev_bytenr_ref_from, + struct btrfsic_block_link_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ + ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ + ((unsigned int)((uintptr_t)bdev_ref_to)) ^ + ((unsigned int)((uintptr_t)bdev_ref_from))) & + (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); + struct list_head *elem; + + list_for_each(elem, h->table + hashval) { + struct btrfsic_block_link *const l = + list_entry(elem, struct btrfsic_block_link, + collision_resolving_node); + + BUG_ON(NULL == l->block_ref_to); + BUG_ON(NULL == l->block_ref_from); + if (l->block_ref_to->dev_state->bdev == bdev_ref_to && + l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && + l->block_ref_from->dev_state->bdev == bdev_ref_from && + l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) + return l; + } + + return NULL; +} + +static void btrfsic_dev_state_hashtable_init( + struct btrfsic_dev_state_hashtable *h) +{ + int i; + + for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) + INIT_LIST_HEAD(h->table + i); +} + +static void btrfsic_dev_state_hashtable_add( + struct btrfsic_dev_state *ds, + struct btrfsic_dev_state_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)((uintptr_t)ds->bdev)) & + (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); + + list_add(&ds->collision_resolving_node, h->table + hashval); +} + +static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) +{ + list_del(&ds->collision_resolving_node); +} + +static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( + struct block_device *bdev, + struct btrfsic_dev_state_hashtable *h) +{ + const unsigned int hashval = + (((unsigned int)((uintptr_t)bdev)) & + (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); + struct list_head *elem; + + list_for_each(elem, h->table + hashval) { + struct btrfsic_dev_state *const ds = + list_entry(elem, struct btrfsic_dev_state, + collision_resolving_node); + + if (ds->bdev == bdev) + return ds; + } + + return NULL; +} + +static int btrfsic_process_superblock(struct btrfsic_state *state, + struct btrfs_fs_devices *fs_devices) +{ + int ret; + struct btrfs_super_block *selected_super; + struct list_head *dev_head = &fs_devices->devices; + struct btrfs_device *device; + struct btrfsic_dev_state *selected_dev_state = NULL; + int pass; + + BUG_ON(NULL == state); + selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); + if (NULL == selected_super) { + printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + return -1; + } + + list_for_each_entry(device, dev_head, dev_list) { + int i; + struct btrfsic_dev_state *dev_state; + + if (!device->bdev || !device->name) + continue; + + dev_state = btrfsic_dev_state_lookup(device->bdev); + BUG_ON(NULL == dev_state); + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + ret = btrfsic_process_superblock_dev_mirror( + state, dev_state, device, i, + &selected_dev_state, selected_super); + if (0 != ret && 0 == i) { + kfree(selected_super); + return ret; + } + } + } + + if (NULL == state->latest_superblock) { + printk(KERN_INFO "btrfsic: no superblock found!\n"); + kfree(selected_super); + return -1; + } + + state->csum_size = btrfs_super_csum_size(selected_super); + + for (pass = 0; pass < 3; pass++) { + int num_copies; + int mirror_num; + u64 next_bytenr; + + switch (pass) { + case 0: + next_bytenr = btrfs_super_root(selected_super); + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "root@%llu\n", + (unsigned long long)next_bytenr); + break; + case 1: + next_bytenr = btrfs_super_chunk_root(selected_super); + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "chunk@%llu\n", + (unsigned long long)next_bytenr); + break; + case 2: + next_bytenr = btrfs_super_log_root(selected_super); + if (0 == next_bytenr) + continue; + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "log@%llu\n", + (unsigned long long)next_bytenr); + break; + } + + num_copies = + btrfs_num_copies(&state->root->fs_info->mapping_tree, + next_bytenr, PAGE_SIZE); + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) + printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + (unsigned long long)next_bytenr, num_copies); + + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + struct btrfsic_block *next_block; + struct btrfsic_block_data_ctx tmp_next_block_ctx; + struct btrfsic_block_link *l; + struct btrfs_header *hdr; + + ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, + &tmp_next_block_ctx, + mirror_num); + if (ret) { + printk(KERN_INFO "btrfsic:" + " btrfsic_map_block(root @%llu," + " mirror %d) failed!\n", + (unsigned long long)next_bytenr, + mirror_num); + kfree(selected_super); + return -1; + } + + next_block = btrfsic_block_hashtable_lookup( + tmp_next_block_ctx.dev->bdev, + tmp_next_block_ctx.dev_bytenr, + &state->block_hashtable); + BUG_ON(NULL == next_block); + + l = btrfsic_block_link_hashtable_lookup( + tmp_next_block_ctx.dev->bdev, + tmp_next_block_ctx.dev_bytenr, + state->latest_superblock->dev_state-> + bdev, + state->latest_superblock->dev_bytenr, + &state->block_link_hashtable); + BUG_ON(NULL == l); + + ret = btrfsic_read_block(state, &tmp_next_block_ctx); + if (ret < (int)BTRFSIC_BLOCK_SIZE) { + printk(KERN_INFO + "btrfsic: read @logical %llu failed!\n", + (unsigned long long) + tmp_next_block_ctx.start); + btrfsic_release_block_ctx(&tmp_next_block_ctx); + kfree(selected_super); + return -1; + } + + hdr = (struct btrfs_header *)tmp_next_block_ctx.data; + ret = btrfsic_process_metablock(state, + next_block, + &tmp_next_block_ctx, + hdr, + BTRFS_MAX_LEVEL + 3, 1); + btrfsic_release_block_ctx(&tmp_next_block_ctx); + } + } + + kfree(selected_super); + return ret; +} + +static int btrfsic_process_superblock_dev_mirror( + struct btrfsic_state *state, + struct btrfsic_dev_state *dev_state, + struct btrfs_device *device, + int superblock_mirror_num, + struct btrfsic_dev_state **selected_dev_state, + struct btrfs_super_block *selected_super) +{ + struct btrfs_super_block *super_tmp; + u64 dev_bytenr; + struct buffer_head *bh; + struct btrfsic_block *superblock_tmp; + int pass; + struct block_device *const superblock_bdev = device->bdev; + + /* super block bytenr is always the unmapped device bytenr */ + dev_bytenr = btrfs_sb_offset(superblock_mirror_num); + bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); + if (NULL == bh) + return -1; + super_tmp = (struct btrfs_super_block *) + (bh->b_data + (dev_bytenr & 4095)); + + if (btrfs_super_bytenr(super_tmp) != dev_bytenr || + strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, + sizeof(super_tmp->magic)) || + memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { + brelse(bh); + return 0; + } + + superblock_tmp = + btrfsic_block_hashtable_lookup(superblock_bdev, + dev_bytenr, + &state->block_hashtable); + if (NULL == superblock_tmp) { + superblock_tmp = btrfsic_block_alloc(); + if (NULL == superblock_tmp) { + printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + brelse(bh); + return -1; + } + /* for superblock, only the dev_bytenr makes sense */ + superblock_tmp->dev_bytenr = dev_bytenr; + superblock_tmp->dev_state = dev_state; + superblock_tmp->logical_bytenr = dev_bytenr; + superblock_tmp->generation = btrfs_super_generation(super_tmp); + superblock_tmp->is_metadata = 1; + superblock_tmp->is_superblock = 1; + superblock_tmp->is_iodone = 1; + superblock_tmp->never_written = 0; + superblock_tmp->mirror_num = 1 + superblock_mirror_num; + if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) + printk(KERN_INFO "New initial S-block (bdev %p, %s)" + " @%llu (%s/%llu/%d)\n", + superblock_bdev, device->name, + (unsigned long long)dev_bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + superblock_mirror_num); + list_add(&superblock_tmp->all_blocks_node, + &state->all_blocks_list); + btrfsic_block_hashtable_add(superblock_tmp, + &state->block_hashtable); + } + + /* select the one with the highest generation field */ + if (btrfs_super_generation(super_tmp) > + state->max_superblock_generation || + 0 == state->max_superblock_generation) { + memcpy(selected_super, super_tmp, sizeof(*selected_super)); + *selected_dev_state = dev_state; + state->max_superblock_generation = + btrfs_super_generation(super_tmp); + state->latest_superblock = superblock_tmp; + } + + for (pass = 0; pass < 3; pass++) { + u64 next_bytenr; + int num_copies; + int mirror_num; + const char *additional_string = NULL; + struct btrfs_disk_key tmp_disk_key; + + tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_disk_key.offset = 0; + switch (pass) { + case 0: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + additional_string = "initial root "; + next_bytenr = btrfs_super_root(super_tmp); + break; + case 1: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + additional_string = "initial chunk "; + next_bytenr = btrfs_super_chunk_root(super_tmp); + break; + case 2: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + additional_string = "initial log "; + next_bytenr = btrfs_super_log_root(super_tmp); + if (0 == next_bytenr) + continue; + break; + } + + num_copies = + btrfs_num_copies(&state->root->fs_info->mapping_tree, + next_bytenr, PAGE_SIZE); + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) + printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + (unsigned long long)next_bytenr, num_copies); + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + struct btrfsic_block *next_block; + struct btrfsic_block_data_ctx tmp_next_block_ctx; + struct btrfsic_block_link *l; + + if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, + &tmp_next_block_ctx, + mirror_num)) { + printk(KERN_INFO "btrfsic: btrfsic_map_block(" + "bytenr @%llu, mirror %d) failed!\n", + (unsigned long long)next_bytenr, + mirror_num); + brelse(bh); + return -1; + } + + next_block = btrfsic_block_lookup_or_add( + state, &tmp_next_block_ctx, + additional_string, 1, 1, 0, + mirror_num, NULL); + if (NULL == next_block) { + btrfsic_release_block_ctx(&tmp_next_block_ctx); + brelse(bh); + return -1; + } + + next_block->disk_key = tmp_disk_key; + next_block->generation = BTRFSIC_GENERATION_UNKNOWN; + l = btrfsic_block_link_lookup_or_add( + state, &tmp_next_block_ctx, + next_block, superblock_tmp, + BTRFSIC_GENERATION_UNKNOWN); + btrfsic_release_block_ctx(&tmp_next_block_ctx); + if (NULL == l) { + brelse(bh); + return -1; + } + } + } + if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) + btrfsic_dump_tree_sub(state, superblock_tmp, 0); + + brelse(bh); + return 0; +} + +static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) +{ + struct btrfsic_stack_frame *sf; + + sf = kzalloc(sizeof(*sf), GFP_NOFS); + if (NULL == sf) + printk(KERN_INFO "btrfsic: alloc memory failed!\n"); + else + sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; + return sf; +} + +static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) +{ + BUG_ON(!(NULL == sf || + BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); + kfree(sf); +} + +static int btrfsic_process_metablock( + struct btrfsic_state *state, + struct btrfsic_block *const first_block, + struct btrfsic_block_data_ctx *const first_block_ctx, + struct btrfs_header *const first_hdr, + int first_limit_nesting, int force_iodone_flag) +{ + struct btrfsic_stack_frame initial_stack_frame = { 0 }; + struct btrfsic_stack_frame *sf; + struct btrfsic_stack_frame *next_stack; + + sf = &initial_stack_frame; + sf->error = 0; + sf->i = -1; + sf->limit_nesting = first_limit_nesting; + sf->block = first_block; + sf->block_ctx = first_block_ctx; + sf->next_block = NULL; + sf->hdr = first_hdr; + sf->prev = NULL; + +continue_with_new_stack_frame: + sf->block->generation = le64_to_cpu(sf->hdr->generation); + if (0 == sf->hdr->level) { + struct btrfs_leaf *const leafhdr = + (struct btrfs_leaf *)sf->hdr; + + if (-1 == sf->i) { + sf->nr = le32_to_cpu(leafhdr->header.nritems); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "leaf %llu items %d generation %llu" + " owner %llu\n", + (unsigned long long) + sf->block_ctx->start, + sf->nr, + (unsigned long long) + le64_to_cpu(leafhdr->header.generation), + (unsigned long long) + le64_to_cpu(leafhdr->header.owner)); + } + +continue_with_current_leaf_stack_frame: + if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { + sf->i++; + sf->num_copies = 0; + } + + if (sf->i < sf->nr) { + struct btrfs_item *disk_item = leafhdr->items + sf->i; + struct btrfs_disk_key *disk_key = &disk_item->key; + u8 type; + const u32 item_offset = le32_to_cpu(disk_item->offset); + + type = disk_key->type; + + if (BTRFS_ROOT_ITEM_KEY == type) { + const struct btrfs_root_item *const root_item = + (struct btrfs_root_item *) + (sf->block_ctx->data + + offsetof(struct btrfs_leaf, items) + + item_offset); + const u64 next_bytenr = + le64_to_cpu(root_item->bytenr); + + sf->error = + btrfsic_create_link_to_next_block( + state, + sf->block, + sf->block_ctx, + next_bytenr, + sf->limit_nesting, + &sf->next_block_ctx, + &sf->next_block, + force_iodone_flag, + &sf->num_copies, + &sf->mirror_num, + disk_key, + le64_to_cpu(root_item-> + generation)); + if (sf->error) + goto one_stack_frame_backwards; + + if (NULL != sf->next_block) { + struct btrfs_header *const next_hdr = + (struct btrfs_header *) + sf->next_block_ctx.data; + + next_stack = + btrfsic_stack_frame_alloc(); + if (NULL == next_stack) { + btrfsic_release_block_ctx( + &sf-> + next_block_ctx); + goto one_stack_frame_backwards; + } + + next_stack->i = -1; + next_stack->block = sf->next_block; + next_stack->block_ctx = + &sf->next_block_ctx; + next_stack->next_block = NULL; + next_stack->hdr = next_hdr; + next_stack->limit_nesting = + sf->limit_nesting - 1; + next_stack->prev = sf; + sf = next_stack; + goto continue_with_new_stack_frame; + } + } else if (BTRFS_EXTENT_DATA_KEY == type && + state->include_extent_data) { + sf->error = btrfsic_handle_extent_data( + state, + sf->block, + sf->block_ctx, + item_offset, + force_iodone_flag); + if (sf->error) + goto one_stack_frame_backwards; + } + + goto continue_with_current_leaf_stack_frame; + } + } else { + struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; + + if (-1 == sf->i) { + sf->nr = le32_to_cpu(nodehdr->header.nritems); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO "node %llu level %d items %d" + " generation %llu owner %llu\n", + (unsigned long long) + sf->block_ctx->start, + nodehdr->header.level, sf->nr, + (unsigned long long) + le64_to_cpu(nodehdr->header.generation), + (unsigned long long) + le64_to_cpu(nodehdr->header.owner)); + } + +continue_with_current_node_stack_frame: + if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { + sf->i++; + sf->num_copies = 0; + } + + if (sf->i < sf->nr) { + struct btrfs_key_ptr *disk_key_ptr = + nodehdr->ptrs + sf->i; + const u64 next_bytenr = + le64_to_cpu(disk_key_ptr->blockptr); + + sf->error = btrfsic_create_link_to_next_block( + state, + sf->block, + sf->block_ctx, + next_bytenr, + sf->limit_nesting, + &sf->next_block_ctx, + &sf->next_block, + force_iodone_flag, + &sf->num_copies, + &sf->mirror_num, + &disk_key_ptr->key, + le64_to_cpu(disk_key_ptr->generation)); + if (sf->error) + goto one_stack_frame_backwards; + + if (NULL != sf->next_block) { + struct btrfs_header *const next_hdr = + (struct btrfs_header *) + sf->next_block_ctx.data; + + next_stack = btrfsic_stack_frame_alloc(); + if (NULL == next_stack) + goto one_stack_frame_backwards; + + next_stack->i = -1; + next_stack->block = sf->next_block; + next_stack->block_ctx = &sf->next_block_ctx; + next_stack->next_block = NULL; + next_stack->hdr = next_hdr; + next_stack->limit_nesting = + sf->limit_nesting - 1; + next_stack->prev = sf; + sf = next_stack; + goto continue_with_new_stack_frame; + } + + goto continue_with_current_node_stack_frame; + } + } + +one_stack_frame_backwards: + if (NULL != sf->prev) { + struct btrfsic_stack_frame *const prev = sf->prev; + + /* the one for the initial block is freed in the caller */ + btrfsic_release_block_ctx(sf->block_ctx); + + if (sf->error) { + prev->error = sf->error; + btrfsic_stack_frame_free(sf); + sf = prev; + goto one_stack_frame_backwards; + } + + btrfsic_stack_frame_free(sf); + sf = prev; + goto continue_with_new_stack_frame; + } else { + BUG_ON(&initial_stack_frame != sf); + } + + return sf->error; +} + +static int btrfsic_create_link_to_next_block( + struct btrfsic_state *state, + struct btrfsic_block *block, + struct btrfsic_block_data_ctx *block_ctx, + u64 next_bytenr, + int limit_nesting, + struct btrfsic_block_data_ctx *next_block_ctx, + struct btrfsic_block **next_blockp, + int force_iodone_flag, + int *num_copiesp, int *mirror_nump, + struct btrfs_disk_key *disk_key, + u64 parent_generation) +{ + struct btrfsic_block *next_block = NULL; + int ret; + struct btrfsic_block_link *l; + int did_alloc_block_link; + int block_was_created; + + *next_blockp = NULL; + if (0 == *num_copiesp) { + *num_copiesp = + btrfs_num_copies(&state->root->fs_info->mapping_tree, + next_bytenr, PAGE_SIZE); + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) + printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + (unsigned long long)next_bytenr, *num_copiesp); + *mirror_nump = 1; + } + + if (*mirror_nump > *num_copiesp) + return 0; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "btrfsic_create_link_to_next_block(mirror_num=%d)\n", + *mirror_nump); + ret = btrfsic_map_block(state, next_bytenr, + BTRFSIC_BLOCK_SIZE, + next_block_ctx, *mirror_nump); + if (ret) { + printk(KERN_INFO + "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", + (unsigned long long)next_bytenr, *mirror_nump); + btrfsic_release_block_ctx(next_block_ctx); + *next_blockp = NULL; + return -1; + } + + next_block = btrfsic_block_lookup_or_add(state, + next_block_ctx, "referenced ", + 1, force_iodone_flag, + !force_iodone_flag, + *mirror_nump, + &block_was_created); + if (NULL == next_block) { + btrfsic_release_block_ctx(next_block_ctx); + *next_blockp = NULL; + return -1; + } + if (block_was_created) { + l = NULL; + next_block->generation = BTRFSIC_GENERATION_UNKNOWN; + } else { + if (next_block->logical_bytenr != next_bytenr && + !(!next_block->is_metadata && + 0 == next_block->logical_bytenr)) { + printk(KERN_INFO + "Referenced block @%llu (%s/%llu/%d)" + " found in hash table, %c," + " bytenr mismatch (!= stored %llu).\n", + (unsigned long long)next_bytenr, + next_block_ctx->dev->name, + (unsigned long long)next_block_ctx->dev_bytenr, + *mirror_nump, + btrfsic_get_block_type(state, next_block), + (unsigned long long)next_block->logical_bytenr); + } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "Referenced block @%llu (%s/%llu/%d)" + " found in hash table, %c.\n", + (unsigned long long)next_bytenr, + next_block_ctx->dev->name, + (unsigned long long)next_block_ctx->dev_bytenr, + *mirror_nump, + btrfsic_get_block_type(state, next_block)); + next_block->logical_bytenr = next_bytenr; + + next_block->mirror_num = *mirror_nump; + l = btrfsic_block_link_hashtable_lookup( + next_block_ctx->dev->bdev, + next_block_ctx->dev_bytenr, + block_ctx->dev->bdev, + block_ctx->dev_bytenr, + &state->block_link_hashtable); + } + + next_block->disk_key = *disk_key; + if (NULL == l) { + l = btrfsic_block_link_alloc(); + if (NULL == l) { + printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + btrfsic_release_block_ctx(next_block_ctx); + *next_blockp = NULL; + return -1; + } + + did_alloc_block_link = 1; + l->block_ref_to = next_block; + l->block_ref_from = block; + l->ref_cnt = 1; + l->parent_generation = parent_generation; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_add_link(state, l); + + list_add(&l->node_ref_to, &block->ref_to_list); + list_add(&l->node_ref_from, &next_block->ref_from_list); + + btrfsic_block_link_hashtable_add(l, + &state->block_link_hashtable); + } else { + did_alloc_block_link = 0; + if (0 == limit_nesting) { + l->ref_cnt++; + l->parent_generation = parent_generation; + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_add_link(state, l); + } + } + + if (limit_nesting > 0 && did_alloc_block_link) { + ret = btrfsic_read_block(state, next_block_ctx); + if (ret < (int)BTRFSIC_BLOCK_SIZE) { + printk(KERN_INFO + "btrfsic: read block @logical %llu failed!\n", + (unsigned long long)next_bytenr); + btrfsic_release_block_ctx(next_block_ctx); + *next_blockp = NULL; + return -1; + } + + *next_blockp = next_block; + } else { + *next_blockp = NULL; + } + (*mirror_nump)++; + + return 0; +} + +static int btrfsic_handle_extent_data( + struct btrfsic_state *state, + struct btrfsic_block *block, + struct btrfsic_block_data_ctx *block_ctx, + u32 item_offset, int force_iodone_flag) +{ + int ret; + struct btrfs_file_extent_item *file_extent_item = + (struct btrfs_file_extent_item *)(block_ctx->data + + offsetof(struct btrfs_leaf, + items) + item_offset); + u64 next_bytenr = + le64_to_cpu(file_extent_item->disk_bytenr) + + le64_to_cpu(file_extent_item->offset); + u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); + u64 generation = le64_to_cpu(file_extent_item->generation); + struct btrfsic_block_link *l; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) + printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," + " offset = %llu, num_bytes = %llu\n", + file_extent_item->type, + (unsigned long long) + le64_to_cpu(file_extent_item->disk_bytenr), + (unsigned long long) + le64_to_cpu(file_extent_item->offset), + (unsigned long long) + le64_to_cpu(file_extent_item->num_bytes)); + if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || + ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) + return 0; + while (num_bytes > 0) { + u32 chunk_len; + int num_copies; + int mirror_num; + + if (num_bytes > BTRFSIC_BLOCK_SIZE) + chunk_len = BTRFSIC_BLOCK_SIZE; + else + chunk_len = num_bytes; + + num_copies = + btrfs_num_copies(&state->root->fs_info->mapping_tree, + next_bytenr, PAGE_SIZE); + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) + printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + (unsigned long long)next_bytenr, num_copies); + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + struct btrfsic_block_data_ctx next_block_ctx; + struct btrfsic_block *next_block; + int block_was_created; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO "btrfsic_handle_extent_data(" + "mirror_num=%d)\n", mirror_num); + if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) + printk(KERN_INFO + "\tdisk_bytenr = %llu, num_bytes %u\n", + (unsigned long long)next_bytenr, + chunk_len); + ret = btrfsic_map_block(state, next_bytenr, + chunk_len, &next_block_ctx, + mirror_num); + if (ret) { + printk(KERN_INFO + "btrfsic: btrfsic_map_block(@%llu," + " mirror=%d) failed!\n", + (unsigned long long)next_bytenr, + mirror_num); + return -1; + } + + next_block = btrfsic_block_lookup_or_add( + state, + &next_block_ctx, + "referenced ", + 0, + force_iodone_flag, + !force_iodone_flag, + mirror_num, + &block_was_created); + if (NULL == next_block) { + printk(KERN_INFO + "btrfsic: error, kmalloc failed!\n"); + btrfsic_release_block_ctx(&next_block_ctx); + return -1; + } + if (!block_was_created) { + if (next_block->logical_bytenr != next_bytenr && + !(!next_block->is_metadata && + 0 == next_block->logical_bytenr)) { + printk(KERN_INFO + "Referenced block" + " @%llu (%s/%llu/%d)" + " found in hash table, D," + " bytenr mismatch" + " (!= stored %llu).\n", + (unsigned long long)next_bytenr, + next_block_ctx.dev->name, + (unsigned long long) + next_block_ctx.dev_bytenr, + mirror_num, + (unsigned long long) + next_block->logical_bytenr); + } + next_block->logical_bytenr = next_bytenr; + next_block->mirror_num = mirror_num; + } + + l = btrfsic_block_link_lookup_or_add(state, + &next_block_ctx, + next_block, block, + generation); + btrfsic_release_block_ctx(&next_block_ctx); + if (NULL == l) + return -1; + } + + next_bytenr += chunk_len; + num_bytes -= chunk_len; + } + + return 0; +} + +static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, + struct btrfsic_block_data_ctx *block_ctx_out, + int mirror_num) +{ + int ret; + u64 length; + struct btrfs_bio *multi = NULL; + struct btrfs_device *device; + + length = len; + ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, + bytenr, &length, &multi, mirror_num); + + device = multi->stripes[0].dev; + block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); + block_ctx_out->dev_bytenr = multi->stripes[0].physical; + block_ctx_out->start = bytenr; + block_ctx_out->len = len; + block_ctx_out->data = NULL; + block_ctx_out->bh = NULL; + + if (0 == ret) + kfree(multi); + if (NULL == block_ctx_out->dev) { + ret = -ENXIO; + printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); + } + + return ret; +} + +static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, + u32 len, struct block_device *bdev, + struct btrfsic_block_data_ctx *block_ctx_out) +{ + block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); + block_ctx_out->dev_bytenr = bytenr; + block_ctx_out->start = bytenr; + block_ctx_out->len = len; + block_ctx_out->data = NULL; + block_ctx_out->bh = NULL; + if (NULL != block_ctx_out->dev) { + return 0; + } else { + printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); + return -ENXIO; + } +} + +static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) +{ + if (NULL != block_ctx->bh) { + brelse(block_ctx->bh); + block_ctx->bh = NULL; + } +} + +static int btrfsic_read_block(struct btrfsic_state *state, + struct btrfsic_block_data_ctx *block_ctx) +{ + block_ctx->bh = NULL; + if (block_ctx->dev_bytenr & 4095) { + printk(KERN_INFO + "btrfsic: read_block() with unaligned bytenr %llu\n", + (unsigned long long)block_ctx->dev_bytenr); + return -1; + } + if (block_ctx->len > 4096) { + printk(KERN_INFO + "btrfsic: read_block() with too huge size %d\n", + block_ctx->len); + return -1; + } + + block_ctx->bh = __bread(block_ctx->dev->bdev, + block_ctx->dev_bytenr >> 12, 4096); + if (NULL == block_ctx->bh) + return -1; + block_ctx->data = block_ctx->bh->b_data; + + return block_ctx->len; +} + +static void btrfsic_dump_database(struct btrfsic_state *state) +{ + struct list_head *elem_all; + + BUG_ON(NULL == state); + + printk(KERN_INFO "all_blocks_list:\n"); + list_for_each(elem_all, &state->all_blocks_list) { + const struct btrfsic_block *const b_all = + list_entry(elem_all, struct btrfsic_block, + all_blocks_node); + struct list_head *elem_ref_to; + struct list_head *elem_ref_from; + + printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", + btrfsic_get_block_type(state, b_all), + (unsigned long long)b_all->logical_bytenr, + b_all->dev_state->name, + (unsigned long long)b_all->dev_bytenr, + b_all->mirror_num); + + list_for_each(elem_ref_to, &b_all->ref_to_list) { + const struct btrfsic_block_link *const l = + list_entry(elem_ref_to, + struct btrfsic_block_link, + node_ref_to); + + printk(KERN_INFO " %c @%llu (%s/%llu/%d)" + " refers %u* to" + " %c @%llu (%s/%llu/%d)\n", + btrfsic_get_block_type(state, b_all), + (unsigned long long)b_all->logical_bytenr, + b_all->dev_state->name, + (unsigned long long)b_all->dev_bytenr, + b_all->mirror_num, + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); + } + + list_for_each(elem_ref_from, &b_all->ref_from_list) { + const struct btrfsic_block_link *const l = + list_entry(elem_ref_from, + struct btrfsic_block_link, + node_ref_from); + + printk(KERN_INFO " %c @%llu (%s/%llu/%d)" + " is ref %u* from" + " %c @%llu (%s/%llu/%d)\n", + btrfsic_get_block_type(state, b_all), + (unsigned long long)b_all->logical_bytenr, + b_all->dev_state->name, + (unsigned long long)b_all->dev_bytenr, + b_all->mirror_num, + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_from), + (unsigned long long) + l->block_ref_from->logical_bytenr, + l->block_ref_from->dev_state->name, + (unsigned long long) + l->block_ref_from->dev_bytenr, + l->block_ref_from->mirror_num); + } + + printk(KERN_INFO "\n"); + } +} + +/* + * Test whether the disk block contains a tree block (leaf or node) + * (note that this test fails for the super block) + */ +static int btrfsic_test_for_metadata(struct btrfsic_state *state, + const u8 *data, unsigned int size) +{ + struct btrfs_header *h; + u8 csum[BTRFS_CSUM_SIZE]; + u32 crc = ~(u32)0; + int fail = 0; + int crc_fail = 0; + + h = (struct btrfs_header *)data; + + if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) + fail++; + + crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, csum); + if (memcmp(csum, h->csum, state->csum_size)) + crc_fail++; + + return fail || crc_fail; +} + +static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, + u64 dev_bytenr, + u8 *mapped_data, unsigned int len, + struct bio *bio, + int *bio_is_patched, + struct buffer_head *bh, + int submit_bio_bh_rw) +{ + int is_metadata; + struct btrfsic_block *block; + struct btrfsic_block_data_ctx block_ctx; + int ret; + struct btrfsic_state *state = dev_state->state; + struct block_device *bdev = dev_state->bdev; + + WARN_ON(len > PAGE_SIZE); + is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); + if (NULL != bio_is_patched) + *bio_is_patched = 0; + + block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, + &state->block_hashtable); + if (NULL != block) { + u64 bytenr; + struct list_head *elem_ref_to; + struct list_head *tmp_ref_to; + + if (block->is_superblock) { + bytenr = le64_to_cpu(((struct btrfs_super_block *) + mapped_data)->bytenr); + is_metadata = 1; + if (state->print_mask & + BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { + printk(KERN_INFO + "[before new superblock is written]:\n"); + btrfsic_dump_tree_sub(state, block, 0); + } + } + if (is_metadata) { + if (!block->is_superblock) { + bytenr = le64_to_cpu(((struct btrfs_header *) + mapped_data)->bytenr); + btrfsic_cmp_log_and_dev_bytenr(state, bytenr, + dev_state, + dev_bytenr, + mapped_data); + } + if (block->logical_bytenr != bytenr) { + printk(KERN_INFO + "Written block @%llu (%s/%llu/%d)" + " found in hash table, %c," + " bytenr mismatch" + " (!= stored %llu).\n", + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + block->mirror_num, + btrfsic_get_block_type(state, block), + (unsigned long long) + block->logical_bytenr); + block->logical_bytenr = bytenr; + } else if (state->print_mask & + BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "Written block @%llu (%s/%llu/%d)" + " found in hash table, %c.\n", + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + block->mirror_num, + btrfsic_get_block_type(state, block)); + } else { + bytenr = block->logical_bytenr; + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "Written block @%llu (%s/%llu/%d)" + " found in hash table, %c.\n", + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + block->mirror_num, + btrfsic_get_block_type(state, block)); + } + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "ref_to_list: %cE, ref_from_list: %cE\n", + list_empty(&block->ref_to_list) ? ' ' : '!', + list_empty(&block->ref_from_list) ? ' ' : '!'); + if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { + printk(KERN_INFO "btrfs: attempt to overwrite %c-block" + " @%llu (%s/%llu/%d), old(gen=%llu," + " objectid=%llu, type=%d, offset=%llu)," + " new(gen=%llu)," + " which is referenced by most recent superblock" + " (superblockgen=%llu)!\n", + btrfsic_get_block_type(state, block), + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + block->mirror_num, + (unsigned long long)block->generation, + (unsigned long long) + le64_to_cpu(block->disk_key.objectid), + block->disk_key.type, + (unsigned long long) + le64_to_cpu(block->disk_key.offset), + (unsigned long long) + le64_to_cpu(((struct btrfs_header *) + mapped_data)->generation), + (unsigned long long) + state->max_superblock_generation); + btrfsic_dump_tree(state); + } + + if (!block->is_iodone && !block->never_written) { + printk(KERN_INFO "btrfs: attempt to overwrite %c-block" + " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," + " which is not yet iodone!\n", + btrfsic_get_block_type(state, block), + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + block->mirror_num, + (unsigned long long)block->generation, + (unsigned long long) + le64_to_cpu(((struct btrfs_header *) + mapped_data)->generation)); + /* it would not be safe to go on */ + btrfsic_dump_tree(state); + return; + } + + /* + * Clear all references of this block. Do not free + * the block itself even if is not referenced anymore + * because it still carries valueable information + * like whether it was ever written and IO completed. + */ + list_for_each_safe(elem_ref_to, tmp_ref_to, + &block->ref_to_list) { + struct btrfsic_block_link *const l = + list_entry(elem_ref_to, + struct btrfsic_block_link, + node_ref_to); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_rem_link(state, l); + l->ref_cnt--; + if (0 == l->ref_cnt) { + list_del(&l->node_ref_to); + list_del(&l->node_ref_from); + btrfsic_block_link_hashtable_remove(l); + btrfsic_block_link_free(l); + } + } + + if (block->is_superblock) + ret = btrfsic_map_superblock(state, bytenr, len, + bdev, &block_ctx); + else + ret = btrfsic_map_block(state, bytenr, len, + &block_ctx, 0); + if (ret) { + printk(KERN_INFO + "btrfsic: btrfsic_map_block(root @%llu)" + " failed!\n", (unsigned long long)bytenr); + return; + } + block_ctx.data = mapped_data; + /* the following is required in case of writes to mirrors, + * use the same that was used for the lookup */ + block_ctx.dev = dev_state; + block_ctx.dev_bytenr = dev_bytenr; + + if (is_metadata || state->include_extent_data) { + block->never_written = 0; + block->iodone_w_error = 0; + if (NULL != bio) { + block->is_iodone = 0; + BUG_ON(NULL == bio_is_patched); + if (!*bio_is_patched) { + block->orig_bio_bh_private = + bio->bi_private; + block->orig_bio_bh_end_io.bio = + bio->bi_end_io; + block->next_in_same_bio = NULL; + bio->bi_private = block; + bio->bi_end_io = btrfsic_bio_end_io; + *bio_is_patched = 1; + } else { + struct btrfsic_block *chained_block = + (struct btrfsic_block *) + bio->bi_private; + + BUG_ON(NULL == chained_block); + block->orig_bio_bh_private = + chained_block->orig_bio_bh_private; + block->orig_bio_bh_end_io.bio = + chained_block->orig_bio_bh_end_io. + bio; + block->next_in_same_bio = chained_block; + bio->bi_private = block; + } + } else if (NULL != bh) { + block->is_iodone = 0; + block->orig_bio_bh_private = bh->b_private; + block->orig_bio_bh_end_io.bh = bh->b_end_io; + block->next_in_same_bio = NULL; + bh->b_private = block; + bh->b_end_io = btrfsic_bh_end_io; + } else { + block->is_iodone = 1; + block->orig_bio_bh_private = NULL; + block->orig_bio_bh_end_io.bio = NULL; + block->next_in_same_bio = NULL; + } + } + + block->flush_gen = dev_state->last_flush_gen + 1; + block->submit_bio_bh_rw = submit_bio_bh_rw; + if (is_metadata) { + block->logical_bytenr = bytenr; + block->is_metadata = 1; + if (block->is_superblock) { + ret = btrfsic_process_written_superblock( + state, + block, + (struct btrfs_super_block *) + mapped_data); + if (state->print_mask & + BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { + printk(KERN_INFO + "[after new superblock is written]:\n"); + btrfsic_dump_tree_sub(state, block, 0); + } + } else { + block->mirror_num = 0; /* unknown */ + ret = btrfsic_process_metablock( + state, + block, + &block_ctx, + (struct btrfs_header *) + block_ctx.data, + 0, 0); + } + if (ret) + printk(KERN_INFO + "btrfsic: btrfsic_process_metablock" + "(root @%llu) failed!\n", + (unsigned long long)dev_bytenr); + } else { + block->is_metadata = 0; + block->mirror_num = 0; /* unknown */ + block->generation = BTRFSIC_GENERATION_UNKNOWN; + if (!state->include_extent_data + && list_empty(&block->ref_from_list)) { + /* + * disk block is overwritten with extent + * data (not meta data) and we are configured + * to not include extent data: take the + * chance and free the block's memory + */ + btrfsic_block_hashtable_remove(block); + list_del(&block->all_blocks_node); + btrfsic_block_free(block); + } + } + btrfsic_release_block_ctx(&block_ctx); + } else { + /* block has not been found in hash table */ + u64 bytenr; + + if (!is_metadata) { + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO "Written block (%s/%llu/?)" + " !found in hash table, D.\n", + dev_state->name, + (unsigned long long)dev_bytenr); + if (!state->include_extent_data) + return; /* ignore that written D block */ + + /* this is getting ugly for the + * include_extent_data case... */ + bytenr = 0; /* unknown */ + block_ctx.start = bytenr; + block_ctx.len = len; + block_ctx.bh = NULL; + } else { + bytenr = le64_to_cpu(((struct btrfs_header *) + mapped_data)->bytenr); + btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, + dev_bytenr, + mapped_data); + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "Written block @%llu (%s/%llu/?)" + " !found in hash table, M.\n", + (unsigned long long)bytenr, + dev_state->name, + (unsigned long long)dev_bytenr); + + ret = btrfsic_map_block(state, bytenr, len, &block_ctx, + 0); + if (ret) { + printk(KERN_INFO + "btrfsic: btrfsic_map_block(root @%llu)" + " failed!\n", + (unsigned long long)dev_bytenr); + return; + } + } + block_ctx.data = mapped_data; + /* the following is required in case of writes to mirrors, + * use the same that was used for the lookup */ + block_ctx.dev = dev_state; + block_ctx.dev_bytenr = dev_bytenr; + + block = btrfsic_block_alloc(); + if (NULL == block) { + printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + btrfsic_release_block_ctx(&block_ctx); + return; + } + block->dev_state = dev_state; + block->dev_bytenr = dev_bytenr; + block->logical_bytenr = bytenr; + block->is_metadata = is_metadata; + block->never_written = 0; + block->iodone_w_error = 0; + block->mirror_num = 0; /* unknown */ + block->flush_gen = dev_state->last_flush_gen + 1; + block->submit_bio_bh_rw = submit_bio_bh_rw; + if (NULL != bio) { + block->is_iodone = 0; + BUG_ON(NULL == bio_is_patched); + if (!*bio_is_patched) { + block->orig_bio_bh_private = bio->bi_private; + block->orig_bio_bh_end_io.bio = bio->bi_end_io; + block->next_in_same_bio = NULL; + bio->bi_private = block; + bio->bi_end_io = btrfsic_bio_end_io; + *bio_is_patched = 1; + } else { + struct btrfsic_block *chained_block = + (struct btrfsic_block *) + bio->bi_private; + + BUG_ON(NULL == chained_block); + block->orig_bio_bh_private = + chained_block->orig_bio_bh_private; + block->orig_bio_bh_end_io.bio = + chained_block->orig_bio_bh_end_io.bio; + block->next_in_same_bio = chained_block; + bio->bi_private = block; + } + } else if (NULL != bh) { + block->is_iodone = 0; + block->orig_bio_bh_private = bh->b_private; + block->orig_bio_bh_end_io.bh = bh->b_end_io; + block->next_in_same_bio = NULL; + bh->b_private = block; + bh->b_end_io = btrfsic_bh_end_io; + } else { + block->is_iodone = 1; + block->orig_bio_bh_private = NULL; + block->orig_bio_bh_end_io.bio = NULL; + block->next_in_same_bio = NULL; + } + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "New written %c-block @%llu (%s/%llu/%d)\n", + is_metadata ? 'M' : 'D', + (unsigned long long)block->logical_bytenr, + block->dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num); + list_add(&block->all_blocks_node, &state->all_blocks_list); + btrfsic_block_hashtable_add(block, &state->block_hashtable); + + if (is_metadata) { + ret = btrfsic_process_metablock(state, block, + &block_ctx, + (struct btrfs_header *) + block_ctx.data, 0, 0); + if (ret) + printk(KERN_INFO + "btrfsic: process_metablock(root @%llu)" + " failed!\n", + (unsigned long long)dev_bytenr); + } + btrfsic_release_block_ctx(&block_ctx); + } +} + +static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) +{ + struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; + int iodone_w_error; + + /* mutex is not held! This is not save if IO is not yet completed + * on umount */ + iodone_w_error = 0; + if (bio_error_status) + iodone_w_error = 1; + + BUG_ON(NULL == block); + bp->bi_private = block->orig_bio_bh_private; + bp->bi_end_io = block->orig_bio_bh_end_io.bio; + + do { + struct btrfsic_block *next_block; + struct btrfsic_dev_state *const dev_state = block->dev_state; + + if ((dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) + printk(KERN_INFO + "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", + bio_error_status, + btrfsic_get_block_type(dev_state->state, block), + (unsigned long long)block->logical_bytenr, + dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num); + next_block = block->next_in_same_bio; + block->iodone_w_error = iodone_w_error; + if (block->submit_bio_bh_rw & REQ_FLUSH) { + dev_state->last_flush_gen++; + if ((dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) + printk(KERN_INFO + "bio_end_io() new %s flush_gen=%llu\n", + dev_state->name, + (unsigned long long) + dev_state->last_flush_gen); + } + if (block->submit_bio_bh_rw & REQ_FUA) + block->flush_gen = 0; /* FUA completed means block is + * on disk */ + block->is_iodone = 1; /* for FLUSH, this releases the block */ + block = next_block; + } while (NULL != block); + + bp->bi_end_io(bp, bio_error_status); +} + +static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) +{ + struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; + int iodone_w_error = !uptodate; + struct btrfsic_dev_state *dev_state; + + BUG_ON(NULL == block); + dev_state = block->dev_state; + if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) + printk(KERN_INFO + "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", + iodone_w_error, + btrfsic_get_block_type(dev_state->state, block), + (unsigned long long)block->logical_bytenr, + block->dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num); + + block->iodone_w_error = iodone_w_error; + if (block->submit_bio_bh_rw & REQ_FLUSH) { + dev_state->last_flush_gen++; + if ((dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) + printk(KERN_INFO + "bh_end_io() new %s flush_gen=%llu\n", + dev_state->name, + (unsigned long long)dev_state->last_flush_gen); + } + if (block->submit_bio_bh_rw & REQ_FUA) + block->flush_gen = 0; /* FUA completed means block is on disk */ + + bh->b_private = block->orig_bio_bh_private; + bh->b_end_io = block->orig_bio_bh_end_io.bh; + block->is_iodone = 1; /* for FLUSH, this releases the block */ + bh->b_end_io(bh, uptodate); +} + +static int btrfsic_process_written_superblock( + struct btrfsic_state *state, + struct btrfsic_block *const superblock, + struct btrfs_super_block *const super_hdr) +{ + int pass; + + superblock->generation = btrfs_super_generation(super_hdr); + if (!(superblock->generation > state->max_superblock_generation || + 0 == state->max_superblock_generation)) { + if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) + printk(KERN_INFO + "btrfsic: superblock @%llu (%s/%llu/%d)" + " with old gen %llu <= %llu\n", + (unsigned long long)superblock->logical_bytenr, + superblock->dev_state->name, + (unsigned long long)superblock->dev_bytenr, + superblock->mirror_num, + (unsigned long long) + btrfs_super_generation(super_hdr), + (unsigned long long) + state->max_superblock_generation); + } else { + if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) + printk(KERN_INFO + "btrfsic: got new superblock @%llu (%s/%llu/%d)" + " with new gen %llu > %llu\n", + (unsigned long long)superblock->logical_bytenr, + superblock->dev_state->name, + (unsigned long long)superblock->dev_bytenr, + superblock->mirror_num, + (unsigned long long) + btrfs_super_generation(super_hdr), + (unsigned long long) + state->max_superblock_generation); + + state->max_superblock_generation = + btrfs_super_generation(super_hdr); + state->latest_superblock = superblock; + } + + for (pass = 0; pass < 3; pass++) { + int ret; + u64 next_bytenr; + struct btrfsic_block *next_block; + struct btrfsic_block_data_ctx tmp_next_block_ctx; + struct btrfsic_block_link *l; + int num_copies; + int mirror_num; + const char *additional_string = NULL; + struct btrfs_disk_key tmp_disk_key; + + tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_disk_key.offset = 0; + + switch (pass) { + case 0: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + additional_string = "root "; + next_bytenr = btrfs_super_root(super_hdr); + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "root@%llu\n", + (unsigned long long)next_bytenr); + break; + case 1: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + additional_string = "chunk "; + next_bytenr = btrfs_super_chunk_root(super_hdr); + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "chunk@%llu\n", + (unsigned long long)next_bytenr); + break; + case 2: + tmp_disk_key.objectid = + cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + additional_string = "log "; + next_bytenr = btrfs_super_log_root(super_hdr); + if (0 == next_bytenr) + continue; + if (state->print_mask & + BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) + printk(KERN_INFO "log@%llu\n", + (unsigned long long)next_bytenr); + break; + } + + num_copies = + btrfs_num_copies(&state->root->fs_info->mapping_tree, + next_bytenr, PAGE_SIZE); + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) + printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", + (unsigned long long)next_bytenr, num_copies); + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + int was_created; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "btrfsic_process_written_superblock(" + "mirror_num=%d)\n", mirror_num); + ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, + &tmp_next_block_ctx, + mirror_num); + if (ret) { + printk(KERN_INFO + "btrfsic: btrfsic_map_block(@%llu," + " mirror=%d) failed!\n", + (unsigned long long)next_bytenr, + mirror_num); + return -1; + } + + next_block = btrfsic_block_lookup_or_add( + state, + &tmp_next_block_ctx, + additional_string, + 1, 0, 1, + mirror_num, + &was_created); + if (NULL == next_block) { + printk(KERN_INFO + "btrfsic: error, kmalloc failed!\n"); + btrfsic_release_block_ctx(&tmp_next_block_ctx); + return -1; + } + + next_block->disk_key = tmp_disk_key; + if (was_created) + next_block->generation = + BTRFSIC_GENERATION_UNKNOWN; + l = btrfsic_block_link_lookup_or_add( + state, + &tmp_next_block_ctx, + next_block, + superblock, + BTRFSIC_GENERATION_UNKNOWN); + btrfsic_release_block_ctx(&tmp_next_block_ctx); + if (NULL == l) + return -1; + } + } + + if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { + WARN_ON(1); + btrfsic_dump_tree(state); + } + + return 0; +} + +static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, + struct btrfsic_block *const block, + int recursion_level) +{ + struct list_head *elem_ref_to; + int ret = 0; + + if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { + /* + * Note that this situation can happen and does not + * indicate an error in regular cases. It happens + * when disk blocks are freed and later reused. + * The check-integrity module is not aware of any + * block free operations, it just recognizes block + * write operations. Therefore it keeps the linkage + * information for a block until a block is + * rewritten. This can temporarily cause incorrect + * and even circular linkage informations. This + * causes no harm unless such blocks are referenced + * by the most recent super block. + */ + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "btrfsic: abort cyclic linkage (case 1).\n"); + + return ret; + } + + /* + * This algorithm is recursive because the amount of used stack + * space is very small and the max recursion depth is limited. + */ + list_for_each(elem_ref_to, &block->ref_to_list) { + const struct btrfsic_block_link *const l = + list_entry(elem_ref_to, struct btrfsic_block_link, + node_ref_to); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "rl=%d, %c @%llu (%s/%llu/%d)" + " %u* refers to %c @%llu (%s/%llu/%d)\n", + recursion_level, + btrfsic_get_block_type(state, block), + (unsigned long long)block->logical_bytenr, + block->dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num, + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); + if (l->block_ref_to->never_written) { + printk(KERN_INFO "btrfs: attempt to write superblock" + " which references block %c @%llu (%s/%llu/%d)" + " which is never written!\n", + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); + ret = -1; + } else if (!l->block_ref_to->is_iodone) { + printk(KERN_INFO "btrfs: attempt to write superblock" + " which references block %c @%llu (%s/%llu/%d)" + " which is not yet iodone!\n", + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); + ret = -1; + } else if (l->parent_generation != + l->block_ref_to->generation && + BTRFSIC_GENERATION_UNKNOWN != + l->parent_generation && + BTRFSIC_GENERATION_UNKNOWN != + l->block_ref_to->generation) { + printk(KERN_INFO "btrfs: attempt to write superblock" + " which references block %c @%llu (%s/%llu/%d)" + " with generation %llu !=" + " parent generation %llu!\n", + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num, + (unsigned long long)l->block_ref_to->generation, + (unsigned long long)l->parent_generation); + ret = -1; + } else if (l->block_ref_to->flush_gen > + l->block_ref_to->dev_state->last_flush_gen) { + printk(KERN_INFO "btrfs: attempt to write superblock" + " which references block %c @%llu (%s/%llu/%d)" + " which is not flushed out of disk's write cache" + " (block flush_gen=%llu," + " dev->flush_gen=%llu)!\n", + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long) + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num, + (unsigned long long)block->flush_gen, + (unsigned long long) + l->block_ref_to->dev_state->last_flush_gen); + ret = -1; + } else if (-1 == btrfsic_check_all_ref_blocks(state, + l->block_ref_to, + recursion_level + + 1)) { + ret = -1; + } + } + + return ret; +} + +static int btrfsic_is_block_ref_by_superblock( + const struct btrfsic_state *state, + const struct btrfsic_block *block, + int recursion_level) +{ + struct list_head *elem_ref_from; + + if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { + /* refer to comment at "abort cyclic linkage (case 1)" */ + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "btrfsic: abort cyclic linkage (case 2).\n"); + + return 0; + } + + /* + * This algorithm is recursive because the amount of used stack space + * is very small and the max recursion depth is limited. + */ + list_for_each(elem_ref_from, &block->ref_from_list) { + const struct btrfsic_block_link *const l = + list_entry(elem_ref_from, struct btrfsic_block_link, + node_ref_from); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "rl=%d, %c @%llu (%s/%llu/%d)" + " is ref %u* from %c @%llu (%s/%llu/%d)\n", + recursion_level, + btrfsic_get_block_type(state, block), + (unsigned long long)block->logical_bytenr, + block->dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num, + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_from), + (unsigned long long) + l->block_ref_from->logical_bytenr, + l->block_ref_from->dev_state->name, + (unsigned long long) + l->block_ref_from->dev_bytenr, + l->block_ref_from->mirror_num); + if (l->block_ref_from->is_superblock && + state->latest_superblock->dev_bytenr == + l->block_ref_from->dev_bytenr && + state->latest_superblock->dev_state->bdev == + l->block_ref_from->dev_state->bdev) + return 1; + else if (btrfsic_is_block_ref_by_superblock(state, + l->block_ref_from, + recursion_level + + 1)) + return 1; + } + + return 0; +} + +static void btrfsic_print_add_link(const struct btrfsic_state *state, + const struct btrfsic_block_link *l) +{ + printk(KERN_INFO + "Add %u* link from %c @%llu (%s/%llu/%d)" + " to %c @%llu (%s/%llu/%d).\n", + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_from), + (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->dev_state->name, + (unsigned long long)l->block_ref_from->dev_bytenr, + l->block_ref_from->mirror_num, + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long)l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); +} + +static void btrfsic_print_rem_link(const struct btrfsic_state *state, + const struct btrfsic_block_link *l) +{ + printk(KERN_INFO + "Rem %u* link from %c @%llu (%s/%llu/%d)" + " to %c @%llu (%s/%llu/%d).\n", + l->ref_cnt, + btrfsic_get_block_type(state, l->block_ref_from), + (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->dev_state->name, + (unsigned long long)l->block_ref_from->dev_bytenr, + l->block_ref_from->mirror_num, + btrfsic_get_block_type(state, l->block_ref_to), + (unsigned long long)l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, + (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num); +} + +static char btrfsic_get_block_type(const struct btrfsic_state *state, + const struct btrfsic_block *block) +{ + if (block->is_superblock && + state->latest_superblock->dev_bytenr == block->dev_bytenr && + state->latest_superblock->dev_state->bdev == block->dev_state->bdev) + return 'S'; + else if (block->is_superblock) + return 's'; + else if (block->is_metadata) + return 'M'; + else + return 'D'; +} + +static void btrfsic_dump_tree(const struct btrfsic_state *state) +{ + btrfsic_dump_tree_sub(state, state->latest_superblock, 0); +} + +static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, + const struct btrfsic_block *block, + int indent_level) +{ + struct list_head *elem_ref_to; + int indent_add; + static char buf[80]; + int cursor_position; + + /* + * Should better fill an on-stack buffer with a complete line and + * dump it at once when it is time to print a newline character. + */ + + /* + * This algorithm is recursive because the amount of used stack space + * is very small and the max recursion depth is limited. + */ + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + btrfsic_get_block_type(state, block), + (unsigned long long)block->logical_bytenr, + block->dev_state->name, + (unsigned long long)block->dev_bytenr, + block->mirror_num); + if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { + printk("[...]\n"); + return; + } + printk(buf); + indent_level += indent_add; + if (list_empty(&block->ref_to_list)) { + printk("\n"); + return; + } + if (block->mirror_num > 1 && + !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { + printk(" [...]\n"); + return; + } + + cursor_position = indent_level; + list_for_each(elem_ref_to, &block->ref_to_list) { + const struct btrfsic_block_link *const l = + list_entry(elem_ref_to, struct btrfsic_block_link, + node_ref_to); + + while (cursor_position < indent_level) { + printk(" "); + cursor_position++; + } + if (l->ref_cnt > 1) + indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); + else + indent_add = sprintf(buf, " --> "); + if (indent_level + indent_add > + BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { + printk("[...]\n"); + cursor_position = 0; + continue; + } + + printk(buf); + + btrfsic_dump_tree_sub(state, l->block_ref_to, + indent_level + indent_add); + cursor_position = 0; + } +} + +static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( + struct btrfsic_state *state, + struct btrfsic_block_data_ctx *next_block_ctx, + struct btrfsic_block *next_block, + struct btrfsic_block *from_block, + u64 parent_generation) +{ + struct btrfsic_block_link *l; + + l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, + next_block_ctx->dev_bytenr, + from_block->dev_state->bdev, + from_block->dev_bytenr, + &state->block_link_hashtable); + if (NULL == l) { + l = btrfsic_block_link_alloc(); + if (NULL == l) { + printk(KERN_INFO + "btrfsic: error, kmalloc" " failed!\n"); + return NULL; + } + + l->block_ref_to = next_block; + l->block_ref_from = from_block; + l->ref_cnt = 1; + l->parent_generation = parent_generation; + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_add_link(state, l); + + list_add(&l->node_ref_to, &from_block->ref_to_list); + list_add(&l->node_ref_from, &next_block->ref_from_list); + + btrfsic_block_link_hashtable_add(l, + &state->block_link_hashtable); + } else { + l->ref_cnt++; + l->parent_generation = parent_generation; + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_add_link(state, l); + } + + return l; +} + +static struct btrfsic_block *btrfsic_block_lookup_or_add( + struct btrfsic_state *state, + struct btrfsic_block_data_ctx *block_ctx, + const char *additional_string, + int is_metadata, + int is_iodone, + int never_written, + int mirror_num, + int *was_created) +{ + struct btrfsic_block *block; + + block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, + block_ctx->dev_bytenr, + &state->block_hashtable); + if (NULL == block) { + struct btrfsic_dev_state *dev_state; + + block = btrfsic_block_alloc(); + if (NULL == block) { + printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); + return NULL; + } + dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); + if (NULL == dev_state) { + printk(KERN_INFO + "btrfsic: error, lookup dev_state failed!\n"); + btrfsic_block_free(block); + return NULL; + } + block->dev_state = dev_state; + block->dev_bytenr = block_ctx->dev_bytenr; + block->logical_bytenr = block_ctx->start; + block->is_metadata = is_metadata; + block->is_iodone = is_iodone; + block->never_written = never_written; + block->mirror_num = mirror_num; + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + printk(KERN_INFO + "New %s%c-block @%llu (%s/%llu/%d)\n", + additional_string, + btrfsic_get_block_type(state, block), + (unsigned long long)block->logical_bytenr, + dev_state->name, + (unsigned long long)block->dev_bytenr, + mirror_num); + list_add(&block->all_blocks_node, &state->all_blocks_list); + btrfsic_block_hashtable_add(block, &state->block_hashtable); + if (NULL != was_created) + *was_created = 1; + } else { + if (NULL != was_created) + *was_created = 0; + } + + return block; +} + +static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, + u64 bytenr, + struct btrfsic_dev_state *dev_state, + u64 dev_bytenr, char *data) +{ + int num_copies; + int mirror_num; + int ret; + struct btrfsic_block_data_ctx block_ctx; + int match = 0; + + num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, + bytenr, PAGE_SIZE); + + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, + &block_ctx, mirror_num); + if (ret) { + printk(KERN_INFO "btrfsic:" + " btrfsic_map_block(logical @%llu," + " mirror %d) failed!\n", + (unsigned long long)bytenr, mirror_num); + continue; + } + + if (dev_state->bdev == block_ctx.dev->bdev && + dev_bytenr == block_ctx.dev_bytenr) { + match++; + btrfsic_release_block_ctx(&block_ctx); + break; + } + btrfsic_release_block_ctx(&block_ctx); + } + + if (!match) { + printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," + " buffer->log_bytenr=%llu, submit_bio(bdev=%s," + " phys_bytenr=%llu)!\n", + (unsigned long long)bytenr, dev_state->name, + (unsigned long long)dev_bytenr); + for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { + ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, + &block_ctx, mirror_num); + if (ret) + continue; + + printk(KERN_INFO "Read logical bytenr @%llu maps to" + " (%s/%llu/%d)\n", + (unsigned long long)bytenr, + block_ctx.dev->name, + (unsigned long long)block_ctx.dev_bytenr, + mirror_num); + } + WARN_ON(1); + } +} + +static struct btrfsic_dev_state *btrfsic_dev_state_lookup( + struct block_device *bdev) +{ + struct btrfsic_dev_state *ds; + + ds = btrfsic_dev_state_hashtable_lookup(bdev, + &btrfsic_dev_state_hashtable); + return ds; +} + +int btrfsic_submit_bh(int rw, struct buffer_head *bh) +{ + struct btrfsic_dev_state *dev_state; + + if (!btrfsic_is_initialized) + return submit_bh(rw, bh); + + mutex_lock(&btrfsic_mutex); + /* since btrfsic_submit_bh() might also be called before + * btrfsic_mount(), this might return NULL */ + dev_state = btrfsic_dev_state_lookup(bh->b_bdev); + + /* Only called to write the superblock (incl. FLUSH/FUA) */ + if (NULL != dev_state && + (rw & WRITE) && bh->b_size > 0) { + u64 dev_bytenr; + + dev_bytenr = 4096 * bh->b_blocknr; + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) + printk(KERN_INFO + "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," + " size=%lu, data=%p, bdev=%p)\n", + rw, bh->b_blocknr, + (unsigned long long)dev_bytenr, bh->b_size, + bh->b_data, bh->b_bdev); + btrfsic_process_written_block(dev_state, dev_bytenr, + bh->b_data, bh->b_size, NULL, + NULL, bh, rw); + } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) + printk(KERN_INFO + "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", + rw, bh->b_bdev); + if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { + if ((dev_state->state->print_mask & + (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE))) + printk(KERN_INFO + "btrfsic_submit_bh(%s) with FLUSH" + " but dummy block already in use" + " (ignored)!\n", + dev_state->name); + } else { + struct btrfsic_block *const block = + &dev_state->dummy_block_for_bio_bh_flush; + + block->is_iodone = 0; + block->never_written = 0; + block->iodone_w_error = 0; + block->flush_gen = dev_state->last_flush_gen + 1; + block->submit_bio_bh_rw = rw; + block->orig_bio_bh_private = bh->b_private; + block->orig_bio_bh_end_io.bh = bh->b_end_io; + block->next_in_same_bio = NULL; + bh->b_private = block; + bh->b_end_io = btrfsic_bh_end_io; + } + } + mutex_unlock(&btrfsic_mutex); + return submit_bh(rw, bh); +} + +void btrfsic_submit_bio(int rw, struct bio *bio) +{ + struct btrfsic_dev_state *dev_state; + + if (!btrfsic_is_initialized) { + submit_bio(rw, bio); + return; + } + + mutex_lock(&btrfsic_mutex); + /* since btrfsic_submit_bio() is also called before + * btrfsic_mount(), this might return NULL */ + dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); + if (NULL != dev_state && + (rw & WRITE) && NULL != bio->bi_io_vec) { + unsigned int i; + u64 dev_bytenr; + int bio_is_patched; + + dev_bytenr = 512 * bio->bi_sector; + bio_is_patched = 0; + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) + printk(KERN_INFO + "submit_bio(rw=0x%x, bi_vcnt=%u," + " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", + rw, bio->bi_vcnt, bio->bi_sector, + (unsigned long long)dev_bytenr, + bio->bi_bdev); + + for (i = 0; i < bio->bi_vcnt; i++) { + u8 *mapped_data; + + mapped_data = kmap(bio->bi_io_vec[i].bv_page); + if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE) == + (dev_state->state->print_mask & + (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE))) + printk(KERN_INFO + "#%u: page=%p, mapped=%p, len=%u," + " offset=%u\n", + i, bio->bi_io_vec[i].bv_page, + mapped_data, + bio->bi_io_vec[i].bv_len, + bio->bi_io_vec[i].bv_offset); + btrfsic_process_written_block(dev_state, dev_bytenr, + mapped_data, + bio->bi_io_vec[i].bv_len, + bio, &bio_is_patched, + NULL, rw); + kunmap(bio->bi_io_vec[i].bv_page); + dev_bytenr += bio->bi_io_vec[i].bv_len; + } + } else if (NULL != dev_state && (rw & REQ_FLUSH)) { + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) + printk(KERN_INFO + "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", + rw, bio->bi_bdev); + if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { + if ((dev_state->state->print_mask & + (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE))) + printk(KERN_INFO + "btrfsic_submit_bio(%s) with FLUSH" + " but dummy block already in use" + " (ignored)!\n", + dev_state->name); + } else { + struct btrfsic_block *const block = + &dev_state->dummy_block_for_bio_bh_flush; + + block->is_iodone = 0; + block->never_written = 0; + block->iodone_w_error = 0; + block->flush_gen = dev_state->last_flush_gen + 1; + block->submit_bio_bh_rw = rw; + block->orig_bio_bh_private = bio->bi_private; + block->orig_bio_bh_end_io.bio = bio->bi_end_io; + block->next_in_same_bio = NULL; + bio->bi_private = block; + bio->bi_end_io = btrfsic_bio_end_io; + } + } + mutex_unlock(&btrfsic_mutex); + + submit_bio(rw, bio); +} + +int btrfsic_mount(struct btrfs_root *root, + struct btrfs_fs_devices *fs_devices, + int including_extent_data, u32 print_mask) +{ + int ret; + struct btrfsic_state *state; + struct list_head *dev_head = &fs_devices->devices; + struct btrfs_device *device; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (NULL == state) { + printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); + return -1; + } + + if (!btrfsic_is_initialized) { + mutex_init(&btrfsic_mutex); + btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); + btrfsic_is_initialized = 1; + } + mutex_lock(&btrfsic_mutex); + state->root = root; + state->print_mask = print_mask; + state->include_extent_data = including_extent_data; + state->csum_size = 0; + INIT_LIST_HEAD(&state->all_blocks_list); + btrfsic_block_hashtable_init(&state->block_hashtable); + btrfsic_block_link_hashtable_init(&state->block_link_hashtable); + state->max_superblock_generation = 0; + state->latest_superblock = NULL; + + list_for_each_entry(device, dev_head, dev_list) { + struct btrfsic_dev_state *ds; + char *p; + + if (!device->bdev || !device->name) + continue; + + ds = btrfsic_dev_state_alloc(); + if (NULL == ds) { + printk(KERN_INFO + "btrfs check-integrity: kmalloc() failed!\n"); + mutex_unlock(&btrfsic_mutex); + return -1; + } + ds->bdev = device->bdev; + ds->state = state; + bdevname(ds->bdev, ds->name); + ds->name[BDEVNAME_SIZE - 1] = '\0'; + for (p = ds->name; *p != '\0'; p++); + while (p > ds->name && *p != '/') + p--; + if (*p == '/') + p++; + strlcpy(ds->name, p, sizeof(ds->name)); + btrfsic_dev_state_hashtable_add(ds, + &btrfsic_dev_state_hashtable); + } + + ret = btrfsic_process_superblock(state, fs_devices); + if (0 != ret) { + mutex_unlock(&btrfsic_mutex); + btrfsic_unmount(root, fs_devices); + return ret; + } + + if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) + btrfsic_dump_database(state); + if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) + btrfsic_dump_tree(state); + + mutex_unlock(&btrfsic_mutex); + return 0; +} + +void btrfsic_unmount(struct btrfs_root *root, + struct btrfs_fs_devices *fs_devices) +{ + struct list_head *elem_all; + struct list_head *tmp_all; + struct btrfsic_state *state; + struct list_head *dev_head = &fs_devices->devices; + struct btrfs_device *device; + + if (!btrfsic_is_initialized) + return; + + mutex_lock(&btrfsic_mutex); + + state = NULL; + list_for_each_entry(device, dev_head, dev_list) { + struct btrfsic_dev_state *ds; + + if (!device->bdev || !device->name) + continue; + + ds = btrfsic_dev_state_hashtable_lookup( + device->bdev, + &btrfsic_dev_state_hashtable); + if (NULL != ds) { + state = ds->state; + btrfsic_dev_state_hashtable_remove(ds); + btrfsic_dev_state_free(ds); + } + } + + if (NULL == state) { + printk(KERN_INFO + "btrfsic: error, cannot find state information" + " on umount!\n"); + mutex_unlock(&btrfsic_mutex); + return; + } + + /* + * Don't care about keeping the lists' state up to date, + * just free all memory that was allocated dynamically. + * Free the blocks and the block_links. + */ + list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { + struct btrfsic_block *const b_all = + list_entry(elem_all, struct btrfsic_block, + all_blocks_node); + struct list_head *elem_ref_to; + struct list_head *tmp_ref_to; + + list_for_each_safe(elem_ref_to, tmp_ref_to, + &b_all->ref_to_list) { + struct btrfsic_block_link *const l = + list_entry(elem_ref_to, + struct btrfsic_block_link, + node_ref_to); + + if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + btrfsic_print_rem_link(state, l); + + l->ref_cnt--; + if (0 == l->ref_cnt) + btrfsic_block_link_free(l); + } + + if (b_all->is_iodone) + btrfsic_block_free(b_all); + else + printk(KERN_INFO "btrfs: attempt to free %c-block" + " @%llu (%s/%llu/%d) on umount which is" + " not yet iodone!\n", + btrfsic_get_block_type(state, b_all), + (unsigned long long)b_all->logical_bytenr, + b_all->dev_state->name, + (unsigned long long)b_all->dev_bytenr, + b_all->mirror_num); + } + + mutex_unlock(&btrfsic_mutex); + + kfree(state); +} diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h new file mode 100644 index 00000000000..8b59175cc50 --- /dev/null +++ b/fs/btrfs/check-integrity.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) STRATO AG 2011. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#if !defined(__BTRFS_CHECK_INTEGRITY__) +#define __BTRFS_CHECK_INTEGRITY__ + +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY +int btrfsic_submit_bh(int rw, struct buffer_head *bh); +void btrfsic_submit_bio(int rw, struct bio *bio); +#else +#define btrfsic_submit_bh submit_bh +#define btrfsic_submit_bio submit_bio +#endif + +int btrfsic_mount(struct btrfs_root *root, + struct btrfs_fs_devices *fs_devices, + int including_extent_data, u32 print_mask); +void btrfsic_unmount(struct btrfs_root *root, + struct btrfs_fs_devices *fs_devices); + +#endif -- cgit v1.2.3 From c975dd469d748ce619c510050d4fb407c2398591 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 1 Nov 2011 17:06:04 +0100 Subject: Btrfs: add config option to enable btrfs integrity check Added the BTRFS_FS_CHECK_INTEGRITY option to Kconfig. It depends on BTRFS_FS. Signed-off-by: Stefan Behrens --- fs/btrfs/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index ecb9fd3be14..d33f01c08b6 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -31,3 +31,22 @@ config BTRFS_FS_POSIX_ACL Linux website . If you don't know what Access Control Lists are, say N + +config BTRFS_FS_CHECK_INTEGRITY + bool "Btrfs with integrity check tool compiled in (DANGEROUS)" + depends on BTRFS_FS + help + Adds code that examines all block write requests (including + writes of the super block). The goal is to verify that the + state of the filesystem on disk is always consistent, i.e., + after a power-loss or kernel panic event the filesystem is + in a consistent state. + + If the integrity check tool is included and activated in + the mount options, plenty of kernel memory is used, and + plenty of additional CPU cycles are spent. Enabling this + functionality is not intended for normal use. + + In most cases, unless you are a btrfs developer who needs + to verify the integrity of (super)-block write requests + during the run of a regression test, say N -- cgit v1.2.3 From f11e4d7f533249ddfa110116200c5c3a509f9218 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 1 Nov 2011 17:06:39 +0100 Subject: Btrfs: Makefile changes to optionally include btrfs integrity check If the btrfs integrity check is enabled, the files required to implement the checks are included in the build. Signed-off-by: Stefan Behrens --- fs/btrfs/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index c0ddfd29c5e..bc5b3556cee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -11,3 +11,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ reada.o backref.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o +btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o -- cgit v1.2.3 From 21adbd5cbb5344a3fca6bb7ddb2ab6cb03c44546 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 9 Nov 2011 13:44:05 +0100 Subject: Btrfs: integrate integrity check module into btrfs This is the last part of the patch series. It modifies the btrfs code to use the integrity check module if configured to do so with the define BTRFS_FS_CHECK_INTEGRITY. If this define is not set, the only effective change is that code is added that handles the mount option to activate the integrity check. If the mount option is set and the define BTRFS_FS_CHECK_INTEGRITY is not set, that code complains in the log and the mount fails with EINVAL. Add the mount option to activate the usage of the integrity check code. Add invocation of btrfs integrity check code init and cleanup function on mount and umount, respectively. Add hook to call btrfs integrity check code version of submit_bh/submit_bio. Signed-off-by: Stefan Behrens --- fs/btrfs/ctree.h | 8 +++++++- fs/btrfs/disk-io.c | 26 ++++++++++++++++++++++++-- fs/btrfs/extent_io.c | 5 +++-- fs/btrfs/scrub.c | 5 +++-- fs/btrfs/super.c | 39 ++++++++++++++++++++++++++++++++++++++- fs/btrfs/volumes.c | 7 ++++--- 6 files changed, 79 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67385033323..39f6188688e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -971,7 +971,7 @@ struct btrfs_fs_info { * is required instead of the faster short fsync log commits */ u64 last_trans_log_full_commit; - unsigned long mount_opt:20; + unsigned long mount_opt:21; unsigned long compress_type:4; u64 max_inline; u64 alloc_start; @@ -1155,6 +1155,10 @@ struct btrfs_fs_info { int scrub_workers_refcnt; struct btrfs_workers scrub_workers; +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + u32 check_integrity_print_mask; +#endif + /* filesystem state */ u64 fs_state; @@ -1413,6 +1417,8 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) #define BTRFS_MOUNT_RECOVERY (1 << 18) +#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 19) +#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 20) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f9d5551e58..f363c6d9c3d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,6 +43,7 @@ #include "tree-log.h" #include "free-space-cache.h" #include "inode-map.h" +#include "check-integrity.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -2001,6 +2002,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->scrub_pause_wait); init_rwsem(&fs_info->scrub_super_lock); fs_info->scrub_workers_refcnt = 0; +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + fs_info->check_integrity_print_mask = 0; +#endif sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); @@ -2356,6 +2360,19 @@ retry_root_backup: btrfs_set_opt(fs_info->mount_opt, SSD); } +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { + ret = btrfsic_mount(tree_root, fs_devices, + btrfs_test_opt(tree_root, + CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ? + 1 : 0, + fs_info->check_integrity_print_mask); + if (ret) + printk(KERN_WARNING "btrfs: failed to initialize" + " integrity check module %s\n", sb->s_id); + } +#endif + /* do not make disk changes in broken FS */ if (btrfs_super_log_root(disk_super) != 0 && !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { @@ -2634,7 +2651,7 @@ static int write_dev_supers(struct btrfs_device *device, * we fua the first super. The others we allow * to go down lazy. */ - ret = submit_bh(WRITE_FUA, bh); + ret = btrfsic_submit_bh(WRITE_FUA, bh); if (ret) errors++; } @@ -2711,7 +2728,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) device->flush_bio = bio; bio_get(bio); - submit_bio(WRITE_FLUSH, bio); + btrfsic_submit_bio(WRITE_FLUSH, bio); return 0; } @@ -3057,6 +3074,11 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + if (btrfs_test_opt(root, CHECK_INTEGRITY)) + btrfsic_unmount(root, fs_info->fs_devices); +#endif + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 49f3c9dc09f..246669296e0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -18,6 +18,7 @@ #include "ctree.h" #include "btrfs_inode.h" #include "volumes.h" +#include "check-integrity.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1895,7 +1896,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start-page_offset(page)); - submit_bio(WRITE_SYNC, bio); + btrfsic_submit_bio(WRITE_SYNC, bio); wait_for_completion(&compl); if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { @@ -2393,7 +2394,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, mirror_num, bio_flags, start); else - submit_bio(rw, bio); + btrfsic_submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ddf2c90d3fc..567e148caca 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -25,6 +25,7 @@ #include "transaction.h" #include "backref.h" #include "extent_io.h" +#include "check-integrity.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -732,7 +733,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = scrub_fixup_end_io; bio->bi_private = &complete; - submit_bio(rw, bio); + btrfsic_submit_bio(rw, bio); /* this will also unplug the queue */ wait_for_completion(&complete); @@ -958,7 +959,7 @@ static int scrub_submit(struct scrub_dev *sdev) sdev->curr = -1; atomic_inc(&sdev->in_flight); - submit_bio(READ, sbio->bio); + btrfsic_submit_bio(READ, sbio->bio); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 34a8b6112ea..22a2015f1d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -165,7 +165,10 @@ enum { Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, - Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, + Opt_inode_cache, Opt_no_space_cache, Opt_recovery, + Opt_check_integrity, Opt_check_integrity_including_extent_data, + Opt_check_integrity_print_mask, + Opt_err, }; static match_table_t tokens = { @@ -200,6 +203,9 @@ static match_table_t tokens = { {Opt_inode_cache, "inode_cache"}, {Opt_no_space_cache, "nospace_cache"}, {Opt_recovery, "recovery"}, + {Opt_check_integrity, "check_int"}, + {Opt_check_integrity_including_extent_data, "check_int_data"}, + {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, {Opt_err, NULL}, }; @@ -398,6 +404,37 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: enabling auto recovery"); btrfs_set_opt(info->mount_opt, RECOVERY); break; +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + case Opt_check_integrity_including_extent_data: + printk(KERN_INFO "btrfs: enabling check integrity" + " including extent data\n"); + btrfs_set_opt(info->mount_opt, + CHECK_INTEGRITY_INCLUDING_EXTENT_DATA); + btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); + break; + case Opt_check_integrity: + printk(KERN_INFO "btrfs: enabling check integrity\n"); + btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); + break; + case Opt_check_integrity_print_mask: + intarg = 0; + match_int(&args[0], &intarg); + if (intarg) { + info->check_integrity_print_mask = intarg; + printk(KERN_INFO "btrfs:" + " check_integrity_print_mask 0x%x\n", + info->check_integrity_print_mask); + } + break; +#else + case Opt_check_integrity_including_extent_data: + case Opt_check_integrity: + case Opt_check_integrity_print_mask: + printk(KERN_ERR "btrfs: support for check_integrity*" + " not compiled in!\n"); + ret = -EINVAL; + goto out; +#endif case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9..821334f6e3a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -32,6 +32,7 @@ #include "print-tree.h" #include "volumes.h" #include "async-thread.h" +#include "check-integrity.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -246,7 +247,7 @@ loop_lock: sync_pending = 0; } - submit_bio(cur->bi_rw, cur); + btrfsic_submit_bio(cur->bi_rw, cur); num_run++; batch_run++; if (need_resched()) @@ -3304,7 +3305,7 @@ static noinline int schedule_bio(struct btrfs_root *root, /* don't bother with additional async steps for reads, right now */ if (!(rw & REQ_WRITE)) { bio_get(bio); - submit_bio(rw, bio); + btrfsic_submit_bio(rw, bio); bio_put(bio); return 0; } @@ -3399,7 +3400,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (async_submit) schedule_bio(root, dev, rw, bio); else - submit_bio(rw, bio); + btrfsic_submit_bio(rw, bio); } else { bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; bio->bi_sector = logical >> 9; -- cgit v1.2.3 From 22cdfca5641817060dd724a9c30442f5c0675fcd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 21 Dec 2011 14:14:31 -0500 Subject: ext4: remove unneeded file_remove_suid() from ext4_ioctl() In the code to support EXT4_IOC_MOVE_EXT, ext4_ioctl calls file_remove_suid() after the call to ext4_move_extents() if any extents has been moved. There are at least three things wrong with this. First, file_remove_suid() should be called with i_mutex down, which is not here. Second, it should be called before the donor file has been modified, to avoid a potential race condition. Third, and most importantly, it's pointless, because ext4_file_extents() already checks if the donor file has the setuid or setgid bit set, and will return an error in that case. So the first two objections don't really matter, since file_remove_suid() will never need to modify the inode in any case. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a56796814d6..ff1aab7cd6e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -247,8 +247,6 @@ setversion_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write(filp->f_path.mnt); - if (me.moved_len > 0) - file_remove_suid(donor_filp); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) -- cgit v1.2.3 From da5c81356426c476112f2b59fe64bdb1b37f079d Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 12:29:12 +0200 Subject: Btrfs: generic data structure to build unique lists ulist is a generic data structures to hold a collection of unique u64 values. The only operations it supports is adding to the list and enumerating it. It is possible to store an auxiliary value along with the key. The implementation is preliminary and can probably be sped up significantly. It is used by btrfs_find_all_roots() quota to translate recursions into iterative loops. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/Makefile | 2 +- fs/btrfs/ulist.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ulist.h | 68 +++++++++++++++++ 3 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/ulist.c create mode 100644 fs/btrfs/ulist.h (limited to 'fs') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index c0ddfd29c5e..70798407b9a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,6 +8,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o + reada.o backref.o ulist.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c new file mode 100644 index 00000000000..12f5147bd2b --- /dev/null +++ b/fs/btrfs/ulist.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2011 STRATO AG + * written by Arne Jansen + * Distributed under the GNU GPL license version 2. + */ + +#include +#include +#include "ulist.h" + +/* + * ulist is a generic data structure to hold a collection of unique u64 + * values. The only operations it supports is adding to the list and + * enumerating it. + * It is possible to store an auxiliary value along with the key. + * + * The implementation is preliminary and can probably be sped up + * significantly. A first step would be to store the values in an rbtree + * as soon as ULIST_SIZE is exceeded. + * + * A sample usage for ulists is the enumeration of directed graphs without + * visiting a node twice. The pseudo-code could look like this: + * + * ulist = ulist_alloc(); + * ulist_add(ulist, root); + * elem = NULL; + * + * while ((elem = ulist_next(ulist, elem)) { + * for (all child nodes n in elem) + * ulist_add(ulist, n); + * do something useful with the node; + * } + * ulist_free(ulist); + * + * This assumes the graph nodes are adressable by u64. This stems from the + * usage for tree enumeration in btrfs, where the logical addresses are + * 64 bit. + * + * It is also useful for tree enumeration which could be done elegantly + * recursively, but is not possible due to kernel stack limitations. The + * loop would be similar to the above. + */ + +/** + * ulist_init - freshly initialize a ulist + * @ulist: the ulist to initialize + * + * Note: don't use this function to init an already used ulist, use + * ulist_reinit instead. + */ +void ulist_init(struct ulist *ulist) +{ + ulist->nnodes = 0; + ulist->nodes = ulist->int_nodes; + ulist->nodes_alloced = ULIST_SIZE; +} +EXPORT_SYMBOL(ulist_init); + +/** + * ulist_fini - free up additionally allocated memory for the ulist + * @ulist: the ulist from which to free the additional memory + * + * This is useful in cases where the base 'struct ulist' has been statically + * allocated. + */ +void ulist_fini(struct ulist *ulist) +{ + /* + * The first ULIST_SIZE elements are stored inline in struct ulist. + * Only if more elements are alocated they need to be freed. + */ + if (ulist->nodes_alloced > ULIST_SIZE) + kfree(ulist->nodes); + ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */ +} +EXPORT_SYMBOL(ulist_fini); + +/** + * ulist_reinit - prepare a ulist for reuse + * @ulist: ulist to be reused + * + * Free up all additional memory allocated for the list elements and reinit + * the ulist. + */ +void ulist_reinit(struct ulist *ulist) +{ + ulist_fini(ulist); + ulist_init(ulist); +} +EXPORT_SYMBOL(ulist_reinit); + +/** + * ulist_alloc - dynamically allocate a ulist + * @gfp_mask: allocation flags to for base allocation + * + * The allocated ulist will be returned in an initialized state. + */ +struct ulist *ulist_alloc(unsigned long gfp_mask) +{ + struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); + + if (!ulist) + return NULL; + + ulist_init(ulist); + + return ulist; +} +EXPORT_SYMBOL(ulist_alloc); + +/** + * ulist_free - free dynamically allocated ulist + * @ulist: ulist to free + * + * It is not necessary to call ulist_fini before. + */ +void ulist_free(struct ulist *ulist) +{ + if (!ulist) + return; + ulist_fini(ulist); + kfree(ulist); +} +EXPORT_SYMBOL(ulist_free); + +/** + * ulist_add - add an element to the ulist + * @ulist: ulist to add the element to + * @val: value to add to ulist + * @aux: auxiliary value to store along with val + * @gfp_mask: flags to use for allocation + * + * Note: locking must be provided by the caller. In case of rwlocks write + * locking is needed + * + * Add an element to a ulist. The @val will only be added if it doesn't + * already exist. If it is added, the auxiliary value @aux is stored along with + * it. In case @val already exists in the ulist, @aux is ignored, even if + * it differs from the already stored value. + * + * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been + * inserted. + * In case of allocation failure -ENOMEM is returned and the ulist stays + * unaltered. + */ +int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, + unsigned long gfp_mask) +{ + int i; + + for (i = 0; i < ulist->nnodes; ++i) { + if (ulist->nodes[i].val == val) + return 0; + } + + if (ulist->nnodes >= ulist->nodes_alloced) { + u64 new_alloced = ulist->nodes_alloced + 128; + struct ulist_node *new_nodes; + void *old = NULL; + + /* + * if nodes_alloced == ULIST_SIZE no memory has been allocated + * yet, so pass NULL to krealloc + */ + if (ulist->nodes_alloced > ULIST_SIZE) + old = ulist->nodes; + + new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced, + gfp_mask); + if (!new_nodes) + return -ENOMEM; + + if (!old) + memcpy(new_nodes, ulist->int_nodes, + sizeof(ulist->int_nodes)); + + ulist->nodes = new_nodes; + ulist->nodes_alloced = new_alloced; + } + ulist->nodes[ulist->nnodes].val = val; + ulist->nodes[ulist->nnodes].aux = aux; + ++ulist->nnodes; + + return 1; +} +EXPORT_SYMBOL(ulist_add); + +/** + * ulist_next - iterate ulist + * @ulist: ulist to iterate + * @prev: previously returned element or %NULL to start iteration + * + * Note: locking must be provided by the caller. In case of rwlocks only read + * locking is needed + * + * This function is used to iterate an ulist. The iteration is started with + * @prev = %NULL. It returns the next element from the ulist or %NULL when the + * end is reached. No guarantee is made with respect to the order in which + * the elements are returned. They might neither be returned in order of + * addition nor in ascending order. + * It is allowed to call ulist_add during an enumeration. Newly added items + * are guaranteed to show up in the running enumeration. + */ +struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) +{ + int next; + + if (ulist->nnodes == 0) + return NULL; + + if (!prev) + return &ulist->nodes[0]; + + next = (prev - ulist->nodes) + 1; + if (next < 0 || next >= ulist->nnodes) + return NULL; + + return &ulist->nodes[next]; +} +EXPORT_SYMBOL(ulist_next); diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h new file mode 100644 index 00000000000..2e25dec58ec --- /dev/null +++ b/fs/btrfs/ulist.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2011 STRATO AG + * written by Arne Jansen + * Distributed under the GNU GPL license version 2. + * + */ + +#ifndef __ULIST__ +#define __ULIST__ + +/* + * ulist is a generic data structure to hold a collection of unique u64 + * values. The only operations it supports is adding to the list and + * enumerating it. + * It is possible to store an auxiliary value along with the key. + * + * The implementation is preliminary and can probably be sped up + * significantly. A first step would be to store the values in an rbtree + * as soon as ULIST_SIZE is exceeded. + */ + +/* + * number of elements statically allocated inside struct ulist + */ +#define ULIST_SIZE 16 + +/* + * element of the list + */ +struct ulist_node { + u64 val; /* value to store */ + unsigned long aux; /* auxiliary value saved along with the val */ +}; + +struct ulist { + /* + * number of elements stored in list + */ + unsigned long nnodes; + + /* + * number of nodes we already have room for + */ + unsigned long nodes_alloced; + + /* + * pointer to the array storing the elements. The first ULIST_SIZE + * elements are stored inline. In this case the it points to int_nodes. + * After exceeding ULIST_SIZE, dynamic memory is allocated. + */ + struct ulist_node *nodes; + + /* + * inline storage space for the first ULIST_SIZE entries + */ + struct ulist_node int_nodes[ULIST_SIZE]; +}; + +void ulist_init(struct ulist *ulist); +void ulist_fini(struct ulist *ulist); +void ulist_reinit(struct ulist *ulist); +struct ulist *ulist_alloc(unsigned long gfp_mask); +void ulist_free(struct ulist *ulist); +int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, + unsigned long gfp_mask); +struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); + +#endif -- cgit v1.2.3 From c7d22a3c3cdb73d8a0151e2ccc8cf4a48c48310b Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Tue, 22 Nov 2011 15:14:33 +0100 Subject: Btrfs: added helper btrfs_next_item() btrfs_next_item() makes the btrfs path point to the next item, crossing leaf boundaries if needed. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50634abef9b..3e4a07b7981 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2482,6 +2482,13 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) +{ + ++p->slots[0]; + if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) + return btrfs_next_leaf(root, p); + return 0; +} int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); void btrfs_drop_snapshot(struct btrfs_root *root, -- cgit v1.2.3 From 66d7e7f09f77456fe68683247d77721032a00ee5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 15:26:38 +0200 Subject: Btrfs: mark delayed refs as for cow Add a for_cow parameter to add_delayed_*_ref and pass the appropriate value from every call site. The for_cow parameter will later on be used to determine if a ref will change anything with respect to qgroups. Delayed refs coming from relocation are always counted as for_cow, as they don't change subvol quota. Also pass in the fs_info for later use. btrfs_find_all_roots() will use this as an optimization, as changes that are for_cow will not change anything with respect to which root points to a certain leaf. Thus, we don't need to add the current sequence number to those delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 42 ++++++++++---------- fs/btrfs/ctree.h | 17 +++++---- fs/btrfs/delayed-ref.c | 50 ++++++++++++++---------- fs/btrfs/delayed-ref.h | 15 +++++--- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 101 +++++++++++++++++++++++++++++-------------------- fs/btrfs/file.c | 10 ++--- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 5 ++- fs/btrfs/relocation.c | 18 +++++---- fs/btrfs/transaction.c | 4 +- fs/btrfs/tree-log.c | 2 +- 12 files changed, 155 insertions(+), 114 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dede441bdee..0639a555e16 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, 0, new_root_objectid, &disk_key, level, - buf->start, 0); + buf->start, 0, 1); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1); + ret = btrfs_inc_ref(trans, root, cow, 1, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0); + ret = btrfs_inc_ref(trans, root, cow, 0, 1); if (ret) return ret; @@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if ((owner == root->root_key.objectid || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { - ret = btrfs_inc_ref(trans, root, buf, 1); + ret = btrfs_inc_ref(trans, root, buf, 1, 1); BUG_ON(ret); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = btrfs_dec_ref(trans, root, buf, 0); + ret = btrfs_dec_ref(trans, root, buf, 0, 1); BUG_ON(ret); - ret = btrfs_inc_ref(trans, root, cow, 1); + ret = btrfs_inc_ref(trans, root, cow, 1, 1); BUG_ON(ret); } new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1); + ret = btrfs_inc_ref(trans, root, cow, 1, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0); + ret = btrfs_inc_ref(trans, root, cow, 0, 1); BUG_ON(ret); } if (new_flags != 0) { @@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1); + ret = btrfs_inc_ref(trans, root, cow, 1, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0); + ret = btrfs_inc_ref(trans, root, cow, 0, 1); BUG_ON(ret); - ret = btrfs_dec_ref(trans, root, buf, 1); + ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); } clean_tree_block(trans, root, buf); @@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, root->root_key.objectid, &disk_key, - level, search_start, empty_size); + level, search_start, empty_size, 1); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, rcu_assign_pointer(root->node, cow); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + last_ref, 1); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(parent); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + last_ref, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, root, mid, 0, 1, 0); /* once for the root ptr */ free_extent_buffer(mid); return 0; @@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, root, right, 0, 1, 0); free_extent_buffer(right); right = NULL; } else { @@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, root, mid, 0, 1, 0); free_extent_buffer(mid); mid = NULL; } else { @@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, &lower_key, - level, root->node->start, 0); + level, root->node->start, 0, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + &disk_key, level, c->start, 0, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -2970,7 +2970,7 @@ again: right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + &disk_key, 0, l->start, 0, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); return 0; } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3e4a07b7981..543f60bddb3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2277,11 +2277,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size); + u64 hint, u64 empty_size, int for_cow); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref); + u64 parent, int last_ref, int for_cow); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize, @@ -2301,17 +2301,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref); + struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref); + struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 flags, int is_data); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset); + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, + u64 owner, u64 offset, int for_cow); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, @@ -2323,7 +2323,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset); + u64 root_objectid, u64 owner, u64 offset, int for_cow); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); @@ -2492,7 +2492,8 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); void btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref); + struct btrfs_block_rsv *block_rsv, int update_ref, + int for_reloc); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 125cf76fcd0..3a0f0ab804f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -390,7 +390,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, * this does all the dirty work in terms of maintaining the correct * overall modification count. */ -static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, +static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, int action, int is_data) @@ -468,10 +469,12 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, /* * helper to insert a delayed tree ref into the rbtree. */ -static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, +static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action) + u64 ref_root, int level, int action, + int for_cow) { struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_tree_ref *full_ref; @@ -522,11 +525,12 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, /* * helper to insert a delayed data ref into the rbtree. */ -static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, +static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, - int action) + int action, int for_cow) { struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_data_ref *full_ref; @@ -554,6 +558,7 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, full_ref->root = ref_root; ref->type = BTRFS_EXTENT_DATA_REF_KEY; } + full_ref->objectid = owner; full_ref->offset = offset; @@ -580,10 +585,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, * to make sure the delayed ref is eventually processed before this * transaction commits. */ -int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, +int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op) + struct btrfs_delayed_extent_op *extent_op, + int for_cow) { struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; @@ -610,12 +617,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, * insert both the head node and the new ref without dropping * the spin lock */ - ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, - action, 0); + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, + num_bytes, action, 0); BUG_ON(ret); - ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes, - parent, ref_root, level, action); + ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, + num_bytes, parent, ref_root, level, action, + for_cow); BUG_ON(ret); spin_unlock(&delayed_refs->lock); return 0; @@ -624,11 +632,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, /* * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. */ -int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, +int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op) + struct btrfs_delayed_extent_op *extent_op, + int for_cow) { struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; @@ -655,18 +665,20 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, * insert both the head node and the new ref without dropping * the spin lock */ - ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, - action, 1); + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, + num_bytes, action, 1); BUG_ON(ret); - ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes, - parent, ref_root, owner, offset, action); + ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, + num_bytes, parent, ref_root, owner, offset, + action, for_cow); BUG_ON(ret); spin_unlock(&delayed_refs->lock); return 0; } -int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, +int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op) { @@ -683,7 +695,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, + ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, num_bytes, BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data); BUG_ON(ret); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index e287e3b0eab..8316bff18d3 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -151,16 +151,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) } } -int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, +int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op); -int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_extent_op *extent_op, + int for_cow); +int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op); -int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, + struct btrfs_delayed_extent_op *extent_op, + int for_cow); +int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 94abc25392f..6f8cd17c9a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1243,7 +1243,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->ref_cows = 0; leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, - BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); + BTRFS_TREE_LOG_OBJECTID, NULL, + 0, 0, 0, 0); if (IS_ERR(leaf)) { kfree(root); return ERR_CAST(leaf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813c6bb96c9..dc8b9a83459 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1872,20 +1872,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset) + u64 root_objectid, u64 owner, u64 offset, int for_cow) { int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && root_objectid == BTRFS_TREE_LOG_OBJECTID); if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, + ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, + num_bytes, parent, root_objectid, (int)owner, - BTRFS_ADD_DELAYED_REF, NULL); + BTRFS_ADD_DELAYED_REF, NULL, for_cow); } else { - ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, + ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, + num_bytes, parent, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_REF, NULL); + BTRFS_ADD_DELAYED_REF, NULL, for_cow); } return ret; } @@ -2405,7 +2409,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, extent_op->update_key = 0; extent_op->is_data = is_data ? 1 : 0; - ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); + ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, + num_bytes, extent_op); if (ret) kfree(extent_op); return ret; @@ -2590,7 +2595,7 @@ out: static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - int full_backref, int inc) + int full_backref, int inc, int for_cow) { u64 bytenr; u64 num_bytes; @@ -2603,7 +2608,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int level; int ret = 0; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64); + u64, u64, u64, u64, u64, u64, int); ref_root = btrfs_header_owner(buf); nritems = btrfs_header_nritems(buf); @@ -2640,14 +2645,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, - key.offset); + key.offset, for_cow); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = btrfs_level_size(root, level - 1); ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, level - 1, 0); + parent, ref_root, level - 1, 0, + for_cow); if (ret) goto fail; } @@ -2659,15 +2665,15 @@ fail: } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref) + struct extent_buffer *buf, int full_backref, int for_cow) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 1); + return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); } int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref) + struct extent_buffer *buf, int full_backref, int for_cow) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 0); + return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -4937,16 +4943,17 @@ out: void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref) + u64 parent, int last_ref, int for_cow) { struct btrfs_block_group_cache *cache = NULL; int ret; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, - parent, root->root_key.objectid, - btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL); + ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, + buf->start, buf->len, + parent, root->root_key.objectid, + btrfs_header_level(buf), + BTRFS_DROP_DELAYED_REF, NULL, for_cow); BUG_ON(ret); } @@ -4981,12 +4988,12 @@ out: btrfs_put_block_group(cache); } -int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset) +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, + u64 owner, u64 offset, int for_cow) { int ret; + struct btrfs_fs_info *fs_info = root->fs_info; /* * tree log blocks never actually go into the extent allocation @@ -4998,14 +5005,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_pin_extent(root, bytenr, num_bytes, 1); ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, + ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, + num_bytes, parent, root_objectid, (int)owner, - BTRFS_DROP_DELAYED_REF, NULL); + BTRFS_DROP_DELAYED_REF, NULL, for_cow); BUG_ON(ret); } else { - ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, - parent, root_objectid, owner, - offset, BTRFS_DROP_DELAYED_REF, NULL); + ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, + num_bytes, + parent, root_objectid, owner, + offset, BTRFS_DROP_DELAYED_REF, + NULL, for_cow); BUG_ON(ret); } return ret; @@ -5826,9 +5836,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, - 0, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_EXTENT, NULL); + ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, + ins->offset, 0, + root_objectid, owner, offset, + BTRFS_ADD_DELAYED_EXTENT, NULL, 0); return ret; } @@ -5998,7 +6009,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size) + u64 hint, u64 empty_size, int for_cow) { struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; @@ -6042,10 +6053,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, extent_op->update_flags = 1; extent_op->is_data = 0; - ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, + ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, + ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, - extent_op); + extent_op, for_cow); BUG_ON(ret); } return buf; @@ -6062,6 +6074,7 @@ struct walk_control { int keep_locks; int reada_slot; int reada_count; + int for_reloc; }; #define DROP_REFERENCE 1 @@ -6200,9 +6213,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, /* wc->stage == UPDATE_BACKREF */ if (!(wc->flags[level] & flag)) { BUG_ON(!path->locks[level]); - ret = btrfs_inc_ref(trans, root, eb, 1); + ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); BUG_ON(ret); - ret = btrfs_dec_ref(trans, root, eb, 0); + ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); BUG_ON(ret); ret = btrfs_set_disk_extent_flags(trans, root, eb->start, eb->len, flag, 0); @@ -6346,7 +6359,7 @@ skip: } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, - root->root_key.objectid, level - 1, 0); + root->root_key.objectid, level - 1, 0, 0); BUG_ON(ret); } btrfs_tree_unlock(next); @@ -6420,9 +6433,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (wc->refs[level] == 1) { if (level == 0) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) - ret = btrfs_dec_ref(trans, root, eb, 1); + ret = btrfs_dec_ref(trans, root, eb, 1, + wc->for_reloc); else - ret = btrfs_dec_ref(trans, root, eb, 0); + ret = btrfs_dec_ref(trans, root, eb, 0, + wc->for_reloc); BUG_ON(ret); } /* make block locked assertion in clean_tree_block happy */ @@ -6449,7 +6464,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_header_owner(path->nodes[level + 1])); } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); out: wc->refs[level] = 0; wc->flags[level] = 0; @@ -6533,7 +6548,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * blocks are properly updated. */ void btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref) + struct btrfs_block_rsv *block_rsv, int update_ref, + int for_reloc) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -6621,6 +6637,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root, wc->stage = DROP_REFERENCE; wc->update_ref = update_ref; wc->keep_locks = 0; + wc->for_reloc = for_reloc; wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { @@ -6705,6 +6722,7 @@ out: * drop subtree rooted at tree block 'node'. * * NOTE: this function will unlock and release tree block 'node' + * only used by relocation code */ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -6749,6 +6767,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, wc->stage = DROP_REFERENCE; wc->update_ref = 0; wc->keep_locks = 1; + wc->for_reloc = 1; wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f2e92828960..d2b60ed6c33 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -678,7 +678,7 @@ next_slot: disk_bytenr, num_bytes, 0, root->root_key.objectid, new_key.objectid, - start - extent_offset); + start - extent_offset, 0); BUG_ON(ret); *hint_byte = disk_bytenr; } @@ -753,7 +753,7 @@ next_slot: disk_bytenr, num_bytes, 0, root->root_key.objectid, key.objectid, key.offset - - extent_offset); + extent_offset, 0); BUG_ON(ret); inode_sub_bytes(inode, extent_end - key.offset); @@ -962,7 +962,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); if (split == start) { @@ -989,7 +989,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); } other_start = 0; @@ -1006,7 +1006,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); } if (del_nr == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5ccec23984..ea819386b86 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3139,7 +3139,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - ino, extent_offset); + ino, extent_offset, 0); BUG_ON(ret); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 72d461656f6..c48f2e931ea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -358,7 +358,7 @@ static noinline int create_subvol(struct btrfs_root *root, return PTR_ERR(trans); leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, objectid, NULL, 0, 0, 0); + 0, objectid, NULL, 0, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; @@ -2425,7 +2425,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, disko, diskl, 0, root->root_key.objectid, btrfs_ino(inode), - new_key.offset - datao); + new_key.offset - datao, + 0); BUG_ON(ret); } } else if (type == BTRFS_FILE_EXTENT_INLINE) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index dff29d5e151..8c1aae2c845 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, new_bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, 1); BUG_ON(ret); ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, 1); BUG_ON(ret); } if (dirty) @@ -1778,21 +1778,23 @@ again: ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + src->root_key.objectid, level - 1, 0, + 1); BUG_ON(ret); ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0); + 0, 1); BUG_ON(ret); ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + src->root_key.objectid, level - 1, 0, + 1); BUG_ON(ret); ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0); + 0, 1); BUG_ON(ret); btrfs_unlock_up_safe(path, 0); @@ -2244,7 +2246,7 @@ again: } else { list_del_init(&reloc_root->root_list); } - btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); + btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); } if (found) { @@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, node->eb->start, blocksize, upper->eb->start, btrfs_header_owner(upper->eb), - node->level, 0); + node->level, 0, 1); BUG_ON(ret); ret = btrfs_drop_subtree(trans, root, eb, upper->eb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 81376d94cd3..a2bfedcbcab 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1393,9 +1393,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) if (btrfs_header_backref_rev(root->node) < BTRFS_MIXED_BACKREF_REV) - btrfs_drop_snapshot(root, NULL, 0); + btrfs_drop_snapshot(root, NULL, 0, 0); else - btrfs_drop_snapshot(root, NULL, 1); + btrfs_drop_snapshot(root, NULL, 1, 0); } return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f4d81c06d48..fce7b9ec3bf 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, ins.objectid, ins.offset, 0, root->root_key.objectid, - key->objectid, offset); + key->objectid, offset, 0); BUG_ON(ret); } else { /* -- cgit v1.2.3 From eebe063b7f916087cd5c61de57b20a3a30894a96 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 14:01:24 +0200 Subject: Btrfs: always save ref_root in delayed refs For consistent backref walking and (later) qgroup calculation the information to which root a delayed ref belongs is useful even for shared refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/delayed-ref.c | 18 ++++++++---------- fs/btrfs/delayed-ref.h | 12 ++++-------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 3a0f0ab804f..babd37badb4 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -495,13 +495,12 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->in_tree = 1; full_ref = btrfs_delayed_node_to_tree_ref(ref); - if (parent) { - full_ref->parent = parent; + full_ref->parent = parent; + full_ref->root = ref_root; + if (parent) ref->type = BTRFS_SHARED_BLOCK_REF_KEY; - } else { - full_ref->root = ref_root; + else ref->type = BTRFS_TREE_BLOCK_REF_KEY; - } full_ref->level = level; trace_btrfs_delayed_tree_ref(ref, full_ref, action); @@ -551,13 +550,12 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->in_tree = 1; full_ref = btrfs_delayed_node_to_data_ref(ref); - if (parent) { - full_ref->parent = parent; + full_ref->parent = parent; + full_ref->root = ref_root; + if (parent) ref->type = BTRFS_SHARED_DATA_REF_KEY; - } else { - full_ref->root = ref_root; + else ref->type = BTRFS_EXTENT_DATA_REF_KEY; - } full_ref->objectid = owner; full_ref->offset = offset; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 8316bff18d3..a5fb2bc8373 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -98,19 +98,15 @@ struct btrfs_delayed_ref_head { struct btrfs_delayed_tree_ref { struct btrfs_delayed_ref_node node; - union { - u64 root; - u64 parent; - }; + u64 root; + u64 parent; int level; }; struct btrfs_delayed_data_ref { struct btrfs_delayed_ref_node node; - union { - u64 root; - u64 parent; - }; + u64 root; + u64 parent; u64 objectid; u64 offset; }; -- cgit v1.2.3 From 2aff57b0c052344e8401a8b4a33c2a1ecb0f627c Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 28 Dec 2011 12:02:13 -0500 Subject: ext4: allocate delalloc blocks before changing journal mode delalloc blocks should be allocated before changing journal mode, otherwise they can not be allocated and even more truncate on delalloc blocks could triggre BUG by flushing delalloc buffers. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 92655fd8965..cb0ba9d77a8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4647,6 +4647,17 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return 0; if (is_journal_aborted(journal)) return -EROFS; + /* We have to allocate physical blocks for delalloc blocks + * before flushing journal. otherwise delalloc blocks can not + * be allocated any more. even more truncate on delalloc blocks + * could trigger BUG by flushing delalloc blocks in journal. + * There is no delalloc block in non-journal data mode. + */ + if (val && test_opt(inode->i_sb, DELALLOC)) { + err = ext4_alloc_da_blocks(inode); + if (err < 0) + return err; + } jbd2_journal_lock_updates(journal); jbd2_journal_flush(journal); -- cgit v1.2.3 From 5872ddaaf05bf25e3ab90580295ebc946405928c Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 28 Dec 2011 13:55:51 -0500 Subject: ext4: flush journal when switching from data=journal mode It's necessary to flush the journal when switching away from data=journal mode. This is because there are no revoke records when data blocks are journalled, but revoke records are required in the other journal modes. However, it is not necessary to flush the journal when switching into data=journal mode, and flushing the journal is expensive. So let's avoid it in that case. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cb0ba9d77a8..1254934de69 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4660,7 +4660,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); /* * OK, there are no updates running now, and all cached data is @@ -4672,8 +4671,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (val) ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); - else + else { + jbd2_journal_flush(journal); ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); + } ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); -- cgit v1.2.3 From 1ba37268cd19e5a2a80924bfe8618bf1ba3e8249 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 28 Dec 2011 17:46:46 -0500 Subject: jbd2: clear revoked flag on buffers before a new transaction started Currently, we clear revoked flag only when a block is reused. However, this can tigger a false journal error. Consider a situation when a block is used as a meta block and is deleted(revoked) in ordered mode, then the block is allocated as a data block to a file. At this moment, user changes the file's journal mode from ordered to journaled and truncates the file. The block will be considered re-revoked by journal because it has revoked flag still pending from the last transaction and an assertion triggers. We fix the problem by keeping the revoked status more uptodate - we clear revoked flag when switching revoke tables to reflect there is no revoked buffers in current transaction any more. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 6 ++++++ fs/jbd2/revoke.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/jbd2.h | 1 + 3 files changed, 41 insertions(+) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 68d704db787..5069b847515 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd_debug(3, "JBD2: commit phase 1\n"); + /* + * Clear revoked flag to reflect there is no revoked buffers + * in the next transaction which is going to be started. + */ + jbd2_clear_buffer_revoked_flags(journal); + /* * Switch to a new revoke table. */ diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 69fd9358811..30b2867d6cc 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -47,6 +47,10 @@ * overwriting the new data. We don't even need to clear the revoke * bit here. * + * We cache revoke status of a buffer in the current transaction in b_states + * bits. As the name says, revokevalid flag indicates that the cached revoke + * status of a buffer is valid and we can rely on the cached status. + * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up @@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) return did_revoke; } +/* + * journal_clear_revoked_flag clears revoked flag of buffers in + * revoke table to reflect there is no revoked buffers in the next + * transaction which is going to be started. + */ +void jbd2_clear_buffer_revoked_flags(journal_t *journal) +{ + struct jbd2_revoke_table_s *revoke = journal->j_revoke; + int i = 0; + + for (i = 0; i < revoke->hash_size; i++) { + struct list_head *hash_list; + struct list_head *list_entry; + hash_list = &revoke->hash_table[i]; + + list_for_each(list_entry, hash_list) { + struct jbd2_revoke_record_s *record; + struct buffer_head *bh; + record = (struct jbd2_revoke_record_s *)list_entry; + bh = __find_get_block(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); + if (bh) { + clear_buffer_revoked(bh); + __brelse(bh); + } + } + } +} + /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2092ea21e46..5557baefed6 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); extern void jbd2_journal_clear_revoke(journal_t *); extern void jbd2_journal_switch_revoke_table(journal_t *journal); +extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); /* * The log thread user interface: -- cgit v1.2.3 From 88635ca277adb67db34e88281817d1ce10713553 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Wed, 28 Dec 2011 19:00:25 -0500 Subject: ext4: add missing spaces to debugging printk's Fix ext4_debug format in ext4_ext_handle_uninitialized_extents() and ext4_end_io_dio(). Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ++-- fs/ext4/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5684f251092..b35bb40556d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3456,8 +3456,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, int err = 0; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; - ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" - "block %llu, max_blocks %u, flags %d, allocated %u", + ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " + "block %llu, max_blocks %u, flags %x, allocated %u\n", inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, flags, allocated); ext4_ext_show_leaf(inode, path); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1254934de69..ef9e8fdddfb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2760,7 +2760,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, if (!io_end || !size) goto out; - ext_debug("ext4_end_io_dio(): io_end 0x%p" + ext_debug("ext4_end_io_dio(): io_end 0x%p " "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", iocb->private, io_end->inode->i_ino, iocb, offset, size); -- cgit v1.2.3 From 14c83c9fddf2e75bdd0c20f1072f35260e356484 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 28 Dec 2011 20:25:13 -0500 Subject: ext4: avoid counting the number of free inodes twice in find_group_orlov() Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8fb6844f973..cdafc05d79c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t real_ngroups = ext4_get_groups_count(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb); - unsigned int freei, avefreei; + unsigned int freei, avefreei, grp_free; ext4_fsblk_t freeb, avefreec; unsigned int ndirs; int max_dirs, min_inodes; @@ -477,8 +477,8 @@ fallback_retry: for (i = 0; i < ngroups; i++) { grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); - if (desc && ext4_free_inodes_count(sb, desc) && - ext4_free_inodes_count(sb, desc) >= avefreei) { + grp_free = ext4_free_inodes_count(sb, desc); + if (desc && grp_free && grp_free >= avefreei) { *group = grp; return 0; } -- cgit v1.2.3 From ccb4d7af914e0fe9b2f1022f8ea6c300463fd5e6 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Wed, 28 Dec 2011 20:25:40 -0500 Subject: ext4: remove no longer used functions in inode.c The functions ext4_block_truncate_page() and ext4_block_zero_page_range() are no longer used, so remove them. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 4 -- fs/ext4/inode.c | 120 -------------------------------------------------------- 2 files changed, 124 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b0e26a1272..ae2407f4502 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1880,10 +1880,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); -extern int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from); -extern int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length); extern int ext4_discard_partial_page_buffers(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length, int flags); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ef9e8fdddfb..e6cc24dfa98 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3301,126 +3301,6 @@ next: return err; } -/* - * ext4_block_truncate_page() zeroes out a mapping from file offset `from' - * up to the end of the block which corresponds to `from'. - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ -int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) -{ - unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned length; - unsigned blocksize; - struct inode *inode = mapping->host; - - blocksize = inode->i_sb->s_blocksize; - length = blocksize - (offset & (blocksize - 1)); - - return ext4_block_zero_page_range(handle, mapping, from, length); -} - -/* - * ext4_block_zero_page_range() zeros out a mapping of length 'length' - * starting from file offset 'from'. The range to be zero'd must - * be contained with in one block. If the specified range exceeds - * the end of the block it will be shortened to end of the block - * that cooresponds to 'from' - */ -int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) -{ - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned blocksize, max, pos; - ext4_lblk_t iblock; - struct inode *inode = mapping->host; - struct buffer_head *bh; - struct page *page; - int err = 0; - - page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, - mapping_gfp_mask(mapping) & ~__GFP_FS); - if (!page) - return -ENOMEM; - - blocksize = inode->i_sb->s_blocksize; - max = blocksize - (offset & (blocksize - 1)); - - /* - * correct length if it does not fall between - * 'from' and the end of the block - */ - if (length > max || length < 0) - length = max; - - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - - /* Find the buffer that contains "offset" */ - bh = page_buffers(page); - pos = blocksize; - while (offset >= pos) { - bh = bh->b_this_page; - iblock++; - pos += blocksize; - } - - err = 0; - if (buffer_freed(bh)) { - BUFFER_TRACE(bh, "freed: skip"); - goto unlock; - } - - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "unmapped"); - ext4_get_block(inode, iblock, bh, 0); - /* unmapped? It's a hole - nothing to do */ - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "still unmapped"); - goto unlock; - } - } - - /* Ok, it's mapped. Make sure it's up-to-date */ - if (PageUptodate(page)) - set_buffer_uptodate(bh); - - if (!buffer_uptodate(bh)) { - err = -EIO; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - /* Uhhuh. Read error. Complain and punt. */ - if (!buffer_uptodate(bh)) - goto unlock; - } - - if (ext4_should_journal_data(inode)) { - BUFFER_TRACE(bh, "get write access"); - err = ext4_journal_get_write_access(handle, bh); - if (err) - goto unlock; - } - - zero_user(page, offset, length); - - BUFFER_TRACE(bh, "zeroed end of block"); - - err = 0; - if (ext4_should_journal_data(inode)) { - err = ext4_handle_dirty_metadata(handle, inode, bh); - } else - mark_buffer_dirty(bh); - -unlock: - unlock_page(page); - page_cache_release(page); - return err; -} - int ext4_can_truncate(struct inode *inode) { if (S_ISREG(inode->i_mode)) -- cgit v1.2.3 From 597d508c17a6dcd17770f4dd9da873d93cc15493 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 28 Dec 2011 20:32:07 -0500 Subject: ext4: use proper little-endian bitops ext4_{set,clear}_bit() is defined as __test_and_{set,clear}_bit_le() for ext4. Only two ext4_{set,clear}_bit() calls check the return value. The rest of calls ignore the return value and they can be replaced with __{set,clear}_bit_le(). This changes ext4_{set,clear}_bit() from __test_and_{set,clear}_bit_le() to __{set,clear}_bit_le() and introduces ext4_test_and_{set,clear}_bit() for the two places where old bit needs to be returned. This ext4_{set,clear}_bit() change is considered safe, because if someone uses these macros without noticing the change, new ext4_{set,clear}_bit don't have return value and causes compiler errors where the return value is used. This also removes unused ext4_find_first_zero_bit(). Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 ++++--- fs/ext4/ialloc.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ae2407f4502..0e43bba049a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -957,12 +957,13 @@ struct ext4_inode_info { #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ EXT4_MOUNT2_##opt) -#define ext4_set_bit __test_and_set_bit_le +#define ext4_test_and_set_bit __test_and_set_bit_le +#define ext4_set_bit __set_bit_le #define ext4_set_bit_atomic ext2_set_bit_atomic -#define ext4_clear_bit __test_and_clear_bit_le +#define ext4_test_and_clear_bit __test_and_clear_bit_le +#define ext4_clear_bit __clear_bit_le #define ext4_clear_bit_atomic ext2_clear_bit_atomic #define ext4_test_bit test_bit_le -#define ext4_find_first_zero_bit find_first_zero_bit_le #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index cdafc05d79c..72fc9892231 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) fatal = ext4_journal_get_write_access(handle, bh2); } ext4_lock_group(sb, block_group); - cleared = ext4_clear_bit(bit, bitmap_bh->b_data); + cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); if (fatal || !cleared) { ext4_unlock_group(sb, block_group); goto out; @@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb, */ down_read(&grp->alloc_sem); ext4_lock_group(sb, group); - if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { + if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { /* not a free inode */ retval = 1; goto err_ret; -- cgit v1.2.3 From e552a596687bf0e1802c744a7bb113afbd2bf4d4 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 29 Dec 2011 03:50:20 +0000 Subject: Squashfs: add missing block release on error condition squashfs_read_metadata forgets to release the cache block if an error has occurred. Signed-off-by: Phillip Lougher --- fs/squashfs/cache.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index f744be98cd5..ea6e798e548 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -332,17 +332,20 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, u64 *block, int *offset, int length) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int bytes, copied = length; + int bytes, res = length; struct squashfs_cache_entry *entry; TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); - if (entry->error) - return entry->error; - else if (*offset >= entry->length) - return -EIO; + if (entry->error) { + res = entry->error; + goto error; + } else if (*offset >= entry->length) { + res = -EIO; + goto error; + } bytes = squashfs_copy_data(buffer, entry, *offset, length); if (buffer) @@ -358,7 +361,11 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, squashfs_cache_put(entry); } - return copied; + return res; + +error: + squashfs_cache_put(entry); + return res; } -- cgit v1.2.3 From d7fbd893388d9e86d29b7cfbd5457bcf03496fbf Mon Sep 17 00:00:00 2001 From: Ajeet Yadav Date: Tue, 27 Dec 2011 15:10:04 +0530 Subject: Squashfs: optimise squashfs_cache_get entry search squashfs_cache_get() iterates over all entries to search for block its looking for. Often get() / put() are called for same block. If we cache the current entry index, then we can optimise the subsequent *_get() calls. Signed-off-by: Ajeet Yadav Signed-off-by: Phillip Lougher --- fs/squashfs/cache.c | 11 ++++++++--- fs/squashfs/squashfs_fs_sb.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index ea6e798e548..af0b7380259 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -70,11 +70,15 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, spin_lock(&cache->lock); while (1) { - for (i = 0; i < cache->entries; i++) - if (cache->entry[i].block == block) + for (i = cache->curr_blk, n = 0; n < cache->entries; n++) { + if (cache->entry[i].block == block) { + cache->curr_blk = i; break; + } + i = (i + 1) % cache->entries; + } - if (i == cache->entries) { + if (n == cache->entries) { /* * Block not in cache, if all cache entries are used * go to sleep waiting for one to become available. @@ -245,6 +249,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } + cache->curr_blk = 0; cache->next_blk = 0; cache->unused = entries; cache->entries = entries; diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 651f0b31d29..52934a22f29 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -28,6 +28,7 @@ struct squashfs_cache { char *name; int entries; + int curr_blk; int next_blk; int num_waiters; int unused; -- cgit v1.2.3 From e97a5d893fdf45c20799b72a1c11dca3b282c89c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 31 Dec 2011 16:54:46 -0200 Subject: [media] fs/compat_ioctl: it needs to see the DVBv3 compat stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the ioctl core should see the DVBv3 compat stuff, as its contents are not available anymore to the drivers. As fs/compat_ioctl also handles DVBv3 ioctl's, it needs those definitions: fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: array type has incomplete element type fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: array type has incomplete element type fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: array type has incomplete element type fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1345: error: initializer element is not constant fs/compat_ioctl.c:1345: error: (near initialization for ‘ioctl_pointer[462]’) fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: array type has incomplete element type fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: array type has incomplete element type fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: array type has incomplete element type fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_parameters’ fs/compat_ioctl.c:1346: error: initializer element is not constant fs/compat_ioctl.c:1346: error: (near initialization for ‘ioctl_pointer[463]’) fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: array type has incomplete element type fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: array type has incomplete element type fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: array type has incomplete element type fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: invalid application of ‘sizeof’ to incomplete type ‘struct dvb_frontend_event’ fs/compat_ioctl.c:1347: error: initializer element is not constant fs/compat_ioctl.c:1347: error: (near initialization for ‘ioctl_pointer[464]’) Reported-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- fs/compat_ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 51352de88ef..32200c2ca7f 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -105,6 +105,7 @@ #include +#define __DVB_CORE__ #include #include #include -- cgit v1.2.3 From cc37f75a9ffbbfcb1c3297534f293c8284e3c5a6 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 2 Jan 2012 17:47:14 +0000 Subject: Squashfs: fix mount time sanity check for corrupted superblock A Squashfs filesystem containing nothing but an empty directory, although unusual and ultimately pointless, is still valid. The directory_table >= next_table sanity check rejects these filesystems as invalid because the directory_table is empty and equal to next_table. Signed-off-by: Phillip Lougher --- fs/squashfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 2da1715452a..4619247d74e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -290,7 +290,7 @@ handle_fragments: check_directory_table: /* Sanity check directory_table */ - if (msblk->directory_table >= next_table) { + if (msblk->directory_table > next_table) { err = -EINVAL; goto failed_mount; } -- cgit v1.2.3 From aec39680b02ed55fcbb2bc87ad96eba16a651546 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jan 2012 17:30:05 -0500 Subject: nfsd4: fix spurious 4.1 post-reboot failures In the NFSv4.1 case, this could cause a spurious "NFSD: failed to write recovery record (err -17); please check that /var/lib/nfs/v4recovery exists and is writable. Signed-off-by: J. Bruce Fields Reported-by: Steve Dickson --- fs/nfsd/nfs4recover.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ed083b9a731..28712e20bb1 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) status = PTR_ERR(dentry); goto out_unlock; } - status = -EEXIST; if (dentry->d_inode) + /* + * In the 4.1 case, where we're called from + * reclaim_complete(), records from the previous reboot + * may still be left, so this is OK. + * + * In the 4.0 case, we should never get here; but we may + * as well be forgiving and just succeed silently. + */ goto out_put; status = mnt_want_write(rec_file->f_path.mnt); if (status) -- cgit v1.2.3 From 3d4a1c80c4eb97187b3a61b3bfa8c804327f7a45 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Jan 2012 02:58:13 +0000 Subject: Squashfs: fix i_blocks calculation with extended regular files The le64_to_cpu() forces the calculation to be unsigned, with the effect that it can underflow leading to an incorrect large value. This bug only triggers in rare(ish) circumstances, an empty file encoded as an extended regular file or a completely sparse file. Normally empty files are encoded as a regular file rather than as an extended regular file (and the regular file i_blocks calculation doesn't have this bug). To save space regular file inodes are optimised to encode the most commonly occurring files. Less common regular files are encoded using extended regular file inodes which contain extra information. Empty files with nlinks greater than 1, and or empty files with extended attributes are encoded using extended regular file inodes and they will hit this bug. Signed-off-by: Phillip Lougher --- fs/squashfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index fd7b3b3bda1..81afbccfa84 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -208,8 +208,8 @@ int squashfs_read_inode(struct inode *inode, long long ino) inode->i_op = &squashfs_inode_ops; inode->i_fop = &generic_ro_fops; inode->i_mode |= S_IFREG; - inode->i_blocks = ((inode->i_size - - le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1; + inode->i_blocks = (inode->i_size - + le64_to_cpu(sqsh_ino->sparse) + 511) >> 9; squashfs_i(inode)->fragment_block = frag_blk; squashfs_i(inode)->fragment_size = frag_size; -- cgit v1.2.3 From b1c770c273a4787069306fc82aab245e9ac72e9d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 21 Dec 2011 00:07:42 +0000 Subject: xfs: fix endian conversion issue in discard code When finding the longest extent in an AG, we read the value directly out of the AGF buffer without endian conversion. This will give an incorrect length, resulting in FITRIM operations potentially not trimming everything that it should. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_discard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8a24f0c6c86..286a051f12c 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -68,7 +68,7 @@ xfs_trim_extents( * Look up the longest btree in the AGF and start with it. */ error = xfs_alloc_lookup_le(cur, 0, - XFS_BUF_TO_AGF(agbp)->agf_longest, &i); + be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); if (error) goto out_del_cursor; @@ -84,7 +84,7 @@ xfs_trim_extents( if (error) goto out_del_cursor; XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); - ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); + ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); /* * Too small? Give up. -- cgit v1.2.3 From 18e3143848f1abdd07e7d9879cf67f4e147ff8b7 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:18:50 -0500 Subject: ext4: add a function which extends a group without checking parameters This patch added a function named ext4_group_extend_no_check() whose code is copied from ext4_group_extend(). ext4_group_extend_no_check() assumes the parameter is valid and has been checked by caller. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 996780ab4f4..1c3c2b56049 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -968,6 +968,57 @@ exit_put: return err; } /* ext4_group_add */ +/* + * extend a group without checking assuming that checking has been done. + */ +static int ext4_group_extend_no_check(struct super_block *sb, + ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + handle_t *handle; + int err = 0, err2; + + /* We will update the superblock, one block bitmap, and + * one group descriptor via ext4_group_add_blocks(). + */ + handle = ext4_journal_start_sb(sb, 3); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + ext4_warning(sb, "error %d on journal start", err); + return err; + } + + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) { + ext4_warning(sb, "error %d on journal write access", err); + goto errout; + } + + ext4_blocks_count_set(es, o_blocks_count + add); + ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, + o_blocks_count + add); + /* We add the blocks to the bitmap and set the group need init bit */ + err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); + if (err) + goto errout; + ext4_handle_dirty_super(handle, sb); + ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, + o_blocks_count + add); +errout: + err2 = ext4_journal_stop(handle); + if (err2 && !err) + err = err2; + + if (!err) { + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: extended group to %llu " + "blocks\n", ext4_blocks_count(es)); + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, + sizeof(struct ext4_super_block)); + } + return err; +} + /* * Extend the filesystem to the new number of blocks specified. This entry * point is only used to extend the current filesystem to the end of the last -- cgit v1.2.3 From bb08c1e7d8c072da338f6d905a89376b36023017 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:20:50 -0500 Subject: ext4: add a function which adds a new group descriptors to a fs This patch adds a function named ext4_add_new_descs() which adds one or more new group descriptors to a fs and whose code is copied from ext4_group_add(). The function will be used by new resize implementation. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1c3c2b56049..3bb4e7b502e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -735,6 +735,52 @@ exit_err: } } +/* + * ext4_add_new_descs() adds @count group descriptor of groups + * starting at @group + * + * @handle: journal handle + * @sb: super block + * @group: the group no. of the first group desc to be added + * @resize_inode: the resize inode + * @count: number of group descriptors to be added + */ +static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, + ext4_group_t group, struct inode *resize_inode, + ext4_group_t count) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct buffer_head *gdb_bh; + int i, gdb_off, gdb_num, err = 0; + + for (i = 0; i < count; i++, group++) { + int reserved_gdb = ext4_bg_has_super(sb, group) ? + le16_to_cpu(es->s_reserved_gdt_blocks) : 0; + + gdb_off = group % EXT4_DESC_PER_BLOCK(sb); + gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + + /* + * We will only either add reserved group blocks to a backup group + * or remove reserved blocks for the first group in a new group block. + * Doing both would be mean more complex code, and sane people don't + * use non-sparse filesystems anymore. This is already checked above. + */ + if (gdb_off) { + gdb_bh = sbi->s_group_desc[gdb_num]; + err = ext4_journal_get_write_access(handle, gdb_bh); + + if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) + err = reserve_backup_gdb(handle, resize_inode, group); + } else + err = add_new_gdb(handle, resize_inode, group); + if (err) + break; + } + return err; +} + /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it -- cgit v1.2.3 From 28c7bac0091687e6116ebd6c179e154ae4053c90 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:22:50 -0500 Subject: ext4: add a structure which will be used by 64bit-resize interface This patch adds a structure which will be used by 64bit-resize interface. Two functions which allocate and destroy the structure respectively are added. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3bb4e7b502e..6076d5e4b51 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -134,6 +134,61 @@ static int verify_group_input(struct super_block *sb, return err; } +/* + * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex + * group each time. + */ +struct ext4_new_flex_group_data { + struct ext4_new_group_data *groups; /* new_group_data for groups + in the flex group */ + __u16 *bg_flags; /* block group flags of groups + in @groups */ + ext4_group_t count; /* number of groups in @groups + */ +}; + +/* + * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of + * @flexbg_size. + * + * Returns NULL on failure otherwise address of the allocated structure. + */ +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) +{ + struct ext4_new_flex_group_data *flex_gd; + + flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); + if (flex_gd == NULL) + goto out3; + + flex_gd->count = flexbg_size; + + flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * + flexbg_size, GFP_NOFS); + if (flex_gd->groups == NULL) + goto out2; + + flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); + if (flex_gd->bg_flags == NULL) + goto out1; + + return flex_gd; + +out1: + kfree(flex_gd->groups); +out2: + kfree(flex_gd); +out3: + return NULL; +} + +static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) +{ + kfree(flex_gd->bg_flags); + kfree(flex_gd->groups); + kfree(flex_gd); +} + static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, ext4_fsblk_t blk) { -- cgit v1.2.3 From 33afdcc5402d0abf70ef2dfb96d0b901d20bcc37 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:32:52 -0500 Subject: ext4: add a function which sets up group blocks of a flex bg This patch adds a function named setup_new_flex_group_blocks() which sets up group blocks of a flex bg. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 8 ++ fs/ext4/resize.c | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0e43bba049a..05058e2b7f4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -511,6 +511,14 @@ struct ext4_new_group_data { __u32 free_blocks_count; }; +/* Indexes used to index group tables in ext4_new_group_data */ +enum { + BLOCK_BITMAP = 0, /* block bitmap */ + INODE_BITMAP, /* inode bitmap */ + INODE_TABLE, /* inode tables */ + GROUP_TABLE_COUNT, +}; + /* * Flags used by ext4_map_blocks() */ diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6076d5e4b51..e8ccb2f8f45 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -233,6 +233,256 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) return 0; } +/* + * set_flexbg_block_bitmap() mark @count blocks starting from @block used. + * + * Helper function for ext4_setup_new_group_blocks() which set . + * + * @sb: super block + * @handle: journal handle + * @flex_gd: flex group data + */ +static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, + struct ext4_new_flex_group_data *flex_gd, + ext4_fsblk_t block, ext4_group_t count) +{ + ext4_group_t count2; + + ext4_debug("mark blocks [%llu/%u] used\n", block, count); + for (count2 = count; count > 0; count -= count2, block += count2) { + ext4_fsblk_t start; + struct buffer_head *bh; + ext4_group_t group; + int err; + + ext4_get_group_no_and_offset(sb, block, &group, NULL); + start = ext4_group_first_block_no(sb, group); + group -= flex_gd->groups[0].group; + + count2 = sb->s_blocksize * 8 - (block - start); + if (count2 > count) + count2 = count; + + if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { + BUG_ON(flex_gd->count > 1); + continue; + } + + err = extend_or_restart_transaction(handle, 1); + if (err) + return err; + + bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); + if (!bh) + return -EIO; + + err = ext4_journal_get_write_access(handle, bh); + if (err) + return err; + ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, + block - start, count2); + ext4_set_bits(bh->b_data, block - start, count2); + + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (unlikely(err)) + return err; + brelse(bh); + } + + return 0; +} + +/* + * Set up the block and inode bitmaps, and the inode table for the new groups. + * This doesn't need to be part of the main transaction, since we are only + * changing blocks outside the actual filesystem. We still do journaling to + * ensure the recovery is correct in case of a failure just after resize. + * If any part of this fails, we simply abort the resize. + * + * setup_new_flex_group_blocks handles a flex group as follow: + * 1. copy super block and GDT, and initialize group tables if necessary. + * In this step, we only set bits in blocks bitmaps for blocks taken by + * super block and GDT. + * 2. allocate group tables in block bitmaps, that is, set bits in block + * bitmap for blocks taken by group tables. + */ +static int setup_new_flex_group_blocks(struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd) +{ + int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; + ext4_fsblk_t start; + ext4_fsblk_t block; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct ext4_new_group_data *group_data = flex_gd->groups; + __u16 *bg_flags = flex_gd->bg_flags; + handle_t *handle; + ext4_group_t group, count; + struct buffer_head *bh = NULL; + int reserved_gdb, i, j, err = 0, err2; + + BUG_ON(!flex_gd->count || !group_data || + group_data[0].group != sbi->s_groups_count); + + reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); + + /* This transaction may be extended/restarted along the way */ + handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + group = group_data[0].group; + for (i = 0; i < flex_gd->count; i++, group++) { + unsigned long gdblocks; + + gdblocks = ext4_bg_num_gdb(sb, group); + start = ext4_group_first_block_no(sb, group); + + /* Copy all of the GDT blocks into the backup in this group */ + for (j = 0, block = start + 1; j < gdblocks; j++, block++) { + struct buffer_head *gdb; + + ext4_debug("update backup group %#04llx\n", block); + err = extend_or_restart_transaction(handle, 1); + if (err) + goto out; + + gdb = sb_getblk(sb, block); + if (!gdb) { + err = -EIO; + goto out; + } + + err = ext4_journal_get_write_access(handle, gdb); + if (err) { + brelse(gdb); + goto out; + } + memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, + gdb->b_size); + set_buffer_uptodate(gdb); + + err = ext4_handle_dirty_metadata(handle, NULL, gdb); + if (unlikely(err)) { + brelse(gdb); + goto out; + } + brelse(gdb); + } + + /* Zero out all of the reserved backup group descriptor + * table blocks + */ + if (ext4_bg_has_super(sb, group)) { + err = sb_issue_zeroout(sb, gdblocks + start + 1, + reserved_gdb, GFP_NOFS); + if (err) + goto out; + } + + /* Initialize group tables of the grop @group */ + if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) + goto handle_bb; + + /* Zero out all of the inode table blocks */ + block = group_data[i].inode_table; + ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", + block, sbi->s_itb_per_group); + err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, + GFP_NOFS); + if (err) + goto out; + +handle_bb: + if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) + goto handle_ib; + + /* Initialize block bitmap of the @group */ + block = group_data[i].block_bitmap; + err = extend_or_restart_transaction(handle, 1); + if (err) + goto out; + + bh = bclean(handle, sb, block); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + goto out; + } + if (ext4_bg_has_super(sb, group)) { + ext4_debug("mark backup superblock %#04llx (+0)\n", + start); + ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + + 1); + } + ext4_mark_bitmap_end(group_data[i].blocks_count, + sb->s_blocksize * 8, bh->b_data); + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out; + brelse(bh); + +handle_ib: + if (bg_flags[i] & EXT4_BG_INODE_UNINIT) + continue; + + /* Initialize inode bitmap of the @group */ + block = group_data[i].inode_bitmap; + err = extend_or_restart_transaction(handle, 1); + if (err) + goto out; + /* Mark unused entries in inode bitmap used */ + bh = bclean(handle, sb, block); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + goto out; + } + + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out; + brelse(bh); + } + bh = NULL; + + /* Mark group tables in block bitmap */ + for (j = 0; j < GROUP_TABLE_COUNT; j++) { + count = group_table_count[j]; + start = (&group_data[0].block_bitmap)[j]; + block = start; + for (i = 1; i < flex_gd->count; i++) { + block += group_table_count[j]; + if (block == (&group_data[i].block_bitmap)[j]) { + count += group_table_count[j]; + continue; + } + err = set_flexbg_block_bitmap(sb, handle, + flex_gd, start, count); + if (err) + goto out; + count = group_table_count[j]; + start = group_data[i].block_bitmap; + block = start; + } + + if (count) { + err = set_flexbg_block_bitmap(sb, handle, + flex_gd, start, count); + if (err) + goto out; + } + } + +out: + brelse(bh); + err2 = ext4_journal_stop(handle); + if (err2 && !err) + err = err2; + + return err; +} + /* * Set up the block and inode bitmaps, and the inode table for the new group. * This doesn't need to be part of the main transaction, since we are only -- cgit v1.2.3 From 083f5b24cc55448e0602a807a5c2872e1f3796e2 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:37:31 -0500 Subject: ext4: add a function which sets up a block group descriptors of a flex bg This patch adds a function named ext4_setup_new_descs which sets up the block group descriptors of a flex bg. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e8ccb2f8f45..098bdb8e97c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1086,6 +1086,62 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, return err; } +/* + * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg + */ +static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd) +{ + struct ext4_new_group_data *group_data = flex_gd->groups; + struct ext4_group_desc *gdp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *gdb_bh; + ext4_group_t group; + __u16 *bg_flags = flex_gd->bg_flags; + int i, gdb_off, gdb_num, err = 0; + + + for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { + group = group_data->group; + + gdb_off = group % EXT4_DESC_PER_BLOCK(sb); + gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + + /* + * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). + */ + gdb_bh = sbi->s_group_desc[gdb_num]; + /* Update group descriptor block for new group */ + gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); + + memset(gdp, 0, EXT4_DESC_SIZE(sb)); + ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); + ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); + ext4_inode_table_set(sb, gdp, group_data->inode_table); + ext4_free_group_clusters_set(sb, gdp, + EXT4_B2C(sbi, group_data->free_blocks_count)); + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); + gdp->bg_flags = cpu_to_le16(*bg_flags); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); + if (unlikely(err)) { + ext4_std_error(sb, err); + break; + } + + /* + * We can allocate memory for mb_alloc based on the new group + * descriptor + */ + err = ext4_mb_add_groupinfo(sb, group, gdp); + if (err) + break; + } + return err; +} + /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it -- cgit v1.2.3 From 2e10e2f2e5a800a54ad2f16dfdd8c034e005958b Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:41:39 -0500 Subject: ext4: add a function which updates the super block during online resizing This patch adds a function named ext4_update_super() which updates super block so the newly created block groups are visible to the file system. This code is copied from ext4_group_add(). The function will be used by new resize implementation. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 098bdb8e97c..eb0aebcca55 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1142,6 +1142,100 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, return err; } +/* + * ext4_update_super() updates the super block so that the newly added + * groups can be seen by the filesystem. + * + * @sb: super block + * @flex_gd: new added groups + */ +static void ext4_update_super(struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd) +{ + ext4_fsblk_t blocks_count = 0; + ext4_fsblk_t free_blocks = 0; + ext4_fsblk_t reserved_blocks = 0; + struct ext4_new_group_data *group_data = flex_gd->groups; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int i; + + BUG_ON(flex_gd->count == 0 || group_data == NULL); + /* + * Make the new blocks and inodes valid next. We do this before + * increasing the group count so that once the group is enabled, + * all of its blocks and inodes are already valid. + * + * We always allocate group-by-group, then block-by-block or + * inode-by-inode within a group, so enabling these + * blocks/inodes before the group is live won't actually let us + * allocate the new space yet. + */ + for (i = 0; i < flex_gd->count; i++) { + blocks_count += group_data[i].blocks_count; + free_blocks += group_data[i].free_blocks_count; + } + + reserved_blocks = ext4_r_blocks_count(es) * 100; + do_div(reserved_blocks, ext4_blocks_count(es)); + reserved_blocks *= blocks_count; + do_div(reserved_blocks, 100); + + ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); + le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * + flex_gd->count); + + /* + * We need to protect s_groups_count against other CPUs seeing + * inconsistent state in the superblock. + * + * The precise rules we use are: + * + * * Writers must perform a smp_wmb() after updating all + * dependent data and before modifying the groups count + * + * * Readers must perform an smp_rmb() after reading the groups + * count and before reading any dependent data. + * + * NB. These rules can be relaxed when checking the group count + * while freeing data, as we can only allocate from a block + * group after serialising against the group count, and we can + * only then free after serialising in turn against that + * allocation. + */ + smp_wmb(); + + /* Update the global fs size fields */ + sbi->s_groups_count += flex_gd->count; + + /* Update the reserved block counts only once the new group is + * active. */ + ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + + reserved_blocks); + + /* Update the free space counts */ + percpu_counter_add(&sbi->s_freeclusters_counter, + EXT4_B2C(sbi, free_blocks)); + percpu_counter_add(&sbi->s_freeinodes_counter, + EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_FLEX_BG) && + sbi->s_log_groups_per_flex) { + ext4_group_t flex_group; + flex_group = ext4_flex_group(sbi, group_data[0].group); + atomic_add(EXT4_B2C(sbi, free_blocks), + &sbi->s_flex_groups[flex_group].free_clusters); + atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, + &sbi->s_flex_groups[flex_group].free_inodes); + } + + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: added group %u:" + "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, + blocks_count, free_blocks, reserved_blocks); +} + /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it -- cgit v1.2.3 From c72df9f928efd5b17e84bdb7b8ec1be3b9c1ea9d Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:43:39 -0500 Subject: ext4: pass verify_reserved_gdb() the number of group decriptors The 64bit resizer adds a flex group each time, so verify_reserved_gdb can not use s_groups_count directly, it should use the number of group decriptors before the added group. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index eb0aebcca55..12eace09654 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -656,10 +656,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, * groups in current filesystem that have BACKUPS, or -ve error code. */ static int verify_reserved_gdb(struct super_block *sb, + ext4_group_t end, struct buffer_head *primary) { const ext4_fsblk_t blk = primary->b_blocknr; - const ext4_group_t end = EXT4_SB(sb)->s_groups_count; unsigned three = 1; unsigned five = 5; unsigned seven = 7; @@ -734,7 +734,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (!gdb_bh) return -EIO; - gdbackups = verify_reserved_gdb(sb, gdb_bh); + gdbackups = verify_reserved_gdb(sb, group, gdb_bh); if (gdbackups < 0) { err = gdbackups; goto exit_bh; @@ -897,7 +897,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, err = -EIO; goto exit_bh; } - if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { + gdbackups = verify_reserved_gdb(sb, group, primary[res]); + if (gdbackups < 0) { brelse(primary[res]); err = gdbackups; goto exit_bh; -- cgit v1.2.3 From 3fbea4b3683a5dfa86489ef7799cbe55e8003dfa Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:44:38 -0500 Subject: ext4: add a new function which allocates bitmaps and inode tables This patch adds a new function named ext4_allocates_group_table() which allocates block bitmaps, inode bitmaps and inode tables for a flex groups and is used by resize code. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 12eace09654..a4075de73c7 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -189,6 +189,117 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) kfree(flex_gd); } +/* + * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps + * and inode tables for a flex group. + * + * This function is used by 64bit-resize. Note that this function allocates + * group tables from the 1st group of groups contained by @flexgd, which may + * be a partial of a flex group. + * + * @sb: super block of fs to which the groups belongs + */ +static void ext4_alloc_group_tables(struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd, + int flexbg_size) +{ + struct ext4_new_group_data *group_data = flex_gd->groups; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + ext4_fsblk_t start_blk; + ext4_fsblk_t last_blk; + ext4_group_t src_group; + ext4_group_t bb_index = 0; + ext4_group_t ib_index = 0; + ext4_group_t it_index = 0; + ext4_group_t group; + ext4_group_t last_group; + unsigned overhead; + + BUG_ON(flex_gd->count == 0 || group_data == NULL); + + src_group = group_data[0].group; + last_group = src_group + flex_gd->count - 1; + + BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) != + (last_group & ~(flexbg_size - 1)))); +next_group: + group = group_data[0].group; + start_blk = ext4_group_first_block_no(sb, src_group); + last_blk = start_blk + group_data[src_group - group].blocks_count; + + overhead = ext4_bg_has_super(sb, src_group) ? + (1 + ext4_bg_num_gdb(sb, src_group) + + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + + start_blk += overhead; + + BUG_ON(src_group >= group_data[0].group + flex_gd->count); + /* We collect contiguous blocks as much as possible. */ + src_group++; + for (; src_group <= last_group; src_group++) + if (!ext4_bg_has_super(sb, src_group)) + last_blk += group_data[src_group - group].blocks_count; + else + break; + + /* Allocate block bitmaps */ + for (; bb_index < flex_gd->count; bb_index++) { + if (start_blk >= last_blk) + goto next_group; + group_data[bb_index].block_bitmap = start_blk++; + ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); + group -= group_data[0].group; + group_data[group].free_blocks_count--; + if (flexbg_size > 1) + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + } + + /* Allocate inode bitmaps */ + for (; ib_index < flex_gd->count; ib_index++) { + if (start_blk >= last_blk) + goto next_group; + group_data[ib_index].inode_bitmap = start_blk++; + ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); + group -= group_data[0].group; + group_data[group].free_blocks_count--; + if (flexbg_size > 1) + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + } + + /* Allocate inode tables */ + for (; it_index < flex_gd->count; it_index++) { + if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) + goto next_group; + group_data[it_index].inode_table = start_blk; + ext4_get_group_no_and_offset(sb, start_blk, &group, NULL); + group -= group_data[0].group; + group_data[group].free_blocks_count -= + EXT4_SB(sb)->s_itb_per_group; + if (flexbg_size > 1) + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + + start_blk += EXT4_SB(sb)->s_itb_per_group; + } + + if (test_opt(sb, DEBUG)) { + int i; + group = group_data[0].group; + + printk(KERN_DEBUG "EXT4-fs: adding a flex group with " + "%d groups, flexbg size is %d:\n", flex_gd->count, + flexbg_size); + + for (i = 0; i < flex_gd->count; i++) { + printk(KERN_DEBUG "adding %s group %u: %u " + "blocks (%d free)\n", + ext4_bg_has_super(sb, group + i) ? "normal" : + "no-super", group + i, + group_data[i].blocks_count, + group_data[i].free_blocks_count); + } + } +} + static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, ext4_fsblk_t blk) { -- cgit v1.2.3 From 4bac1f8cef7bfd2c62793f75aba66a5b8357dede Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 Jan 2012 23:44:38 -0500 Subject: ext4: add a new function which adds a flex group to a fs This patch adds a new function named ext4_flex_group_add() which adds a flex group to a fs. The function is used by 64bit-resize interface. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a4075de73c7..dac23561f3e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1348,6 +1348,88 @@ static void ext4_update_super(struct super_block *sb, blocks_count, free_blocks, reserved_blocks); } +/* Add a flex group to an fs. Ensure we handle all possible error conditions + * _before_ we start modifying the filesystem, because we cannot abort the + * transaction and not have it write the data to disk. + */ +static int ext4_flex_group_add(struct super_block *sb, + struct inode *resize_inode, + struct ext4_new_flex_group_data *flex_gd) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + ext4_fsblk_t o_blocks_count; + ext4_grpblk_t last; + ext4_group_t group; + handle_t *handle; + unsigned reserved_gdb; + int err = 0, err2 = 0, credit; + + BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags); + + reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); + o_blocks_count = ext4_blocks_count(es); + ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); + BUG_ON(last); + + err = setup_new_flex_group_blocks(sb, flex_gd); + if (err) + goto exit; + /* + * We will always be modifying at least the superblock and GDT + * block. If we are adding a group past the last current GDT block, + * we will also modify the inode and the dindirect block. If we + * are adding a group with superblock/GDT backups we will also + * modify each of the reserved GDT dindirect blocks. + */ + credit = flex_gd->count * 4 + reserved_gdb; + handle = ext4_journal_start_sb(sb, credit); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto exit; + } + + err = ext4_journal_get_write_access(handle, sbi->s_sbh); + if (err) + goto exit_journal; + + group = flex_gd->groups[0].group; + BUG_ON(group != EXT4_SB(sb)->s_groups_count); + err = ext4_add_new_descs(handle, sb, group, + resize_inode, flex_gd->count); + if (err) + goto exit_journal; + + err = ext4_setup_new_descs(handle, sb, flex_gd); + if (err) + goto exit_journal; + + ext4_update_super(sb, flex_gd); + + err = ext4_handle_dirty_super(handle, sb); + +exit_journal: + err2 = ext4_journal_stop(handle); + if (!err) + err = err2; + + if (!err) { + int i; + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, + sizeof(struct ext4_super_block)); + for (i = 0; i < flex_gd->count; i++, group++) { + struct buffer_head *gdb_bh; + int gdb_num; + gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); + gdb_bh = sbi->s_group_desc[gdb_num]; + update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, + gdb_bh->b_size); + } + } +exit: + return err; +} + /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it -- cgit v1.2.3 From f95a34c66554235b70a681fcd9feebc195f7ec0e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 14 Oct 2011 12:34:58 -0500 Subject: dlm: move recovery barrier calls Put all the calls to recovery barriers in the same function to clarify where they each happen. Should not change any behavior. Also modify some recovery debug lines to make them consistent. Signed-off-by: David Teigland --- fs/dlm/dir.c | 1 - fs/dlm/member.c | 7 +------ fs/dlm/recover.c | 2 -- fs/dlm/recoverd.c | 45 +++++++++++++++++++++++++++------------------ 4 files changed, 28 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 7b84c1dbc82..83641574b01 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -290,7 +290,6 @@ int dlm_recover_directory(struct dlm_ls *ls) out_status: error = 0; - dlm_set_recover_status(ls, DLM_RS_DIR); log_debug(ls, "dlm_recover_directory %d entries", count); out_free: kfree(last_name); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index b12532e553f..5ebd1df6967 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -251,7 +251,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) ls->ls_low_nodeid = low; make_member_array(ls); - dlm_set_recover_status(ls, DLM_RS_NODES); *neg_out = neg; error = ping_members(ls); @@ -261,12 +260,8 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) ls->ls_members_result = error; complete(&ls->ls_members_done); } - if (error) - goto out; - error = dlm_recover_members_wait(ls); - out: - log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error); + log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); return error; } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 50467cefdbd..81b23930449 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -542,8 +542,6 @@ int dlm_recover_locks(struct dlm_ls *ls) out: if (error) recover_list_clear(ls); - else - dlm_set_recover_status(ls, DLM_RS_LOCKS); return error; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 774da3cf92c..5a9e1a49a86 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -54,7 +54,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) unsigned long start; int error, neg = 0; - log_debug(ls, "recover %llx", (unsigned long long)rv->seq); + log_debug(ls, "dlm_recover %llx", (unsigned long long)rv->seq); mutex_lock(&ls->ls_recoverd_active); @@ -76,14 +76,22 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) /* * Add or remove nodes from the lockspace's ls_nodes list. - * Also waits for all nodes to complete dlm_recover_members. */ error = dlm_recover_members(ls, rv, &neg); if (error) { - log_debug(ls, "recover_members failed %d", error); + log_debug(ls, "dlm_recover_members error %d", error); goto fail; } + + dlm_set_recover_status(ls, DLM_RS_NODES); + + error = dlm_recover_members_wait(ls); + if (error) { + log_debug(ls, "dlm_recover_members_wait error %d", error); + goto fail; + } + start = jiffies; /* @@ -93,17 +101,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_directory(ls); if (error) { - log_debug(ls, "recover_directory failed %d", error); + log_debug(ls, "dlm_recover_directory error %d", error); goto fail; } - /* - * Wait for all nodes to complete directory rebuild. - */ + dlm_set_recover_status(ls, DLM_RS_DIR); error = dlm_recover_directory_wait(ls); if (error) { - log_debug(ls, "recover_directory_wait failed %d", error); + log_debug(ls, "dlm_recover_directory_wait error %d", error); goto fail; } @@ -133,7 +139,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_masters(ls); if (error) { - log_debug(ls, "recover_masters failed %d", error); + log_debug(ls, "dlm_recover_masters error %d", error); goto fail; } @@ -143,13 +149,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_locks(ls); if (error) { - log_debug(ls, "recover_locks failed %d", error); + log_debug(ls, "dlm_recover_locks error %d", error); goto fail; } + dlm_set_recover_status(ls, DLM_RS_LOCKS); + error = dlm_recover_locks_wait(ls); if (error) { - log_debug(ls, "recover_locks_wait failed %d", error); + log_debug(ls, "dlm_recover_locks_wait error %d", error); goto fail; } @@ -170,7 +178,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = dlm_recover_locks_wait(ls); if (error) { - log_debug(ls, "recover_locks_wait failed %d", error); + log_debug(ls, "dlm_recover_locks_wait error %d", error); goto fail; } } @@ -186,9 +194,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_purge_requestqueue(ls); dlm_set_recover_status(ls, DLM_RS_DONE); + error = dlm_recover_done_wait(ls); if (error) { - log_debug(ls, "recover_done_wait failed %d", error); + log_debug(ls, "dlm_recover_done_wait error %d", error); goto fail; } @@ -200,25 +209,25 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) error = enable_locking(ls, rv->seq); if (error) { - log_debug(ls, "enable_locking failed %d", error); + log_debug(ls, "enable_locking error %d", error); goto fail; } error = dlm_process_requestqueue(ls); if (error) { - log_debug(ls, "process_requestqueue failed %d", error); + log_debug(ls, "dlm_process_requestqueue error %d", error); goto fail; } error = dlm_recover_waiters_post(ls); if (error) { - log_debug(ls, "recover_waiters_post failed %d", error); + log_debug(ls, "dlm_recover_waiters_post error %d", error); goto fail; } dlm_grant_after_purge(ls); - log_debug(ls, "recover %llx done: %u ms", + log_debug(ls, "dlm_recover %llx done: %u ms", (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); @@ -227,7 +236,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) fail: dlm_release_root_list(ls); - log_debug(ls, "recover %llx error %d", + log_debug(ls, "dlm_recover %llx error %d", (unsigned long long)rv->seq, error); mutex_unlock(&ls->ls_recoverd_active); return error; -- cgit v1.2.3 From 757a42719635495779462514458bbfbf12a37dac Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 20 Oct 2011 13:26:28 -0500 Subject: dlm: add node slots and generation Slot numbers are assigned to nodes when they join the lockspace. The slot number chosen is the minimum unused value starting at 1. Once a node is assigned a slot, that slot number will not change while the node remains a lockspace member. If the node leaves and rejoins it can be assigned a new slot number. A new generation number is also added to a lockspace. It is set and incremented during each recovery along with the slot collection/assignment. The slot numbers will be passed to gfs2 which will use them as journal id's. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 48 ++++++++- fs/dlm/lockspace.c | 5 + fs/dlm/member.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/dlm/member.h | 7 ++ fs/dlm/rcom.c | 99 +++++++++++++++--- fs/dlm/rcom.h | 2 +- fs/dlm/recover.c | 64 ++++++++++-- 7 files changed, 480 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5685a9a5dba..f4d132c7690 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -117,6 +117,18 @@ struct dlm_member { struct list_head list; int nodeid; int weight; + int slot; + int slot_prev; + uint32_t generation; +}; + +/* + * low nodeid saves array of these in ls_slots + */ + +struct dlm_slot { + int nodeid; + int slot; }; /* @@ -337,7 +349,9 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) /* dlm_header is first element of all structs sent between nodes */ #define DLM_HEADER_MAJOR 0x00030000 -#define DLM_HEADER_MINOR 0x00000000 +#define DLM_HEADER_MINOR 0x00000001 + +#define DLM_HEADER_SLOTS 0x00000001 #define DLM_MSG 1 #define DLM_RCOM 2 @@ -425,10 +439,34 @@ union dlm_packet { struct dlm_rcom rcom; }; +#define DLM_RSF_NEED_SLOTS 0x00000001 + +/* RCOM_STATUS data */ +struct rcom_status { + __le32 rs_flags; + __le32 rs_unused1; + __le64 rs_unused2; +}; + +/* RCOM_STATUS_REPLY data */ struct rcom_config { __le32 rf_lvblen; __le32 rf_lsflags; - __le64 rf_unused; + + /* DLM_HEADER_SLOTS adds: */ + __le32 rf_flags; + __le16 rf_our_slot; + __le16 rf_num_slots; + __le32 rf_generation; + __le32 rf_unused1; + __le64 rf_unused2; +}; + +struct rcom_slot { + __le32 ro_nodeid; + __le16 ro_slot; + __le16 ro_unused1; + __le64 ro_unused2; }; struct rcom_lock { @@ -455,6 +493,7 @@ struct dlm_ls { struct list_head ls_list; /* list of lockspaces */ dlm_lockspace_t *ls_local_handle; uint32_t ls_global_id; /* global unique lockspace ID */ + uint32_t ls_generation; uint32_t ls_exflags; int ls_lvblen; int ls_count; /* refcount of processes in @@ -493,6 +532,11 @@ struct dlm_ls { int ls_total_weight; int *ls_node_array; + int ls_slot; + int ls_num_slots; + int ls_slots_size; + struct dlm_slot *ls_slots; + struct dlm_rsb ls_stub_rsb; /* for returning errors */ struct dlm_lkb ls_stub_lkb; /* for returning errors */ struct dlm_message ls_stub_ms; /* for faking a reply */ diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 1d16a23b0a0..1441f04bfab 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -525,6 +525,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, if (!ls->ls_recover_buf) goto out_dirfree; + ls->ls_slot = 0; + ls->ls_num_slots = 0; + ls->ls_slots_size = 0; + ls->ls_slots = NULL; + INIT_LIST_HEAD(&ls->ls_recover_list); spin_lock_init(&ls->ls_recover_list_lock); ls->ls_recover_list_count = 0; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 5ebd1df6967..eebc52aae82 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -19,6 +19,280 @@ #include "config.h" #include "lowcomms.h" +int dlm_slots_version(struct dlm_header *h) +{ + if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS) + return 0; + return 1; +} + +void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, + struct dlm_member *memb) +{ + struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; + + if (!dlm_slots_version(&rc->rc_header)) + return; + + memb->slot = le16_to_cpu(rf->rf_our_slot); + memb->generation = le32_to_cpu(rf->rf_generation); +} + +void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) +{ + struct dlm_slot *slot; + struct rcom_slot *ro; + int i; + + ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); + + /* ls_slots array is sparse, but not rcom_slots */ + + for (i = 0; i < ls->ls_slots_size; i++) { + slot = &ls->ls_slots[i]; + if (!slot->nodeid) + continue; + ro->ro_nodeid = cpu_to_le32(slot->nodeid); + ro->ro_slot = cpu_to_le16(slot->slot); + ro++; + } +} + +#define SLOT_DEBUG_LINE 128 + +static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, + struct rcom_slot *ro0, struct dlm_slot *array, + int array_size) +{ + char line[SLOT_DEBUG_LINE]; + int len = SLOT_DEBUG_LINE - 1; + int pos = 0; + int ret, i; + + if (!dlm_config.ci_log_debug) + return; + + memset(line, 0, sizeof(line)); + + if (array) { + for (i = 0; i < array_size; i++) { + if (!array[i].nodeid) + continue; + + ret = snprintf(line + pos, len - pos, " %d:%d", + array[i].slot, array[i].nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + } else if (ro0) { + for (i = 0; i < num_slots; i++) { + ret = snprintf(line + pos, len - pos, " %d:%d", + ro0[i].ro_slot, ro0[i].ro_nodeid); + if (ret >= len - pos) + break; + pos += ret; + } + } + + log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); +} + +int dlm_slots_copy_in(struct dlm_ls *ls) +{ + struct dlm_member *memb; + struct dlm_rcom *rc = ls->ls_recover_buf; + struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; + struct rcom_slot *ro0, *ro; + int our_nodeid = dlm_our_nodeid(); + int i, num_slots; + uint32_t gen; + + if (!dlm_slots_version(&rc->rc_header)) + return -1; + + gen = le32_to_cpu(rf->rf_generation); + if (gen <= ls->ls_generation) { + log_error(ls, "dlm_slots_copy_in gen %u old %u", + gen, ls->ls_generation); + } + ls->ls_generation = gen; + + num_slots = le16_to_cpu(rf->rf_num_slots); + if (!num_slots) + return -1; + + ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); + + for (i = 0, ro = ro0; i < num_slots; i++, ro++) { + ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid); + ro->ro_slot = le16_to_cpu(ro->ro_slot); + } + + log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); + + list_for_each_entry(memb, &ls->ls_nodes, list) { + for (i = 0, ro = ro0; i < num_slots; i++, ro++) { + if (ro->ro_nodeid != memb->nodeid) + continue; + memb->slot = ro->ro_slot; + memb->slot_prev = memb->slot; + break; + } + + if (memb->nodeid == our_nodeid) { + if (ls->ls_slot && ls->ls_slot != memb->slot) { + log_error(ls, "dlm_slots_copy_in our slot " + "changed %d %d", ls->ls_slot, + memb->slot); + return -1; + } + + if (!ls->ls_slot) + ls->ls_slot = memb->slot; + } + + if (!memb->slot) { + log_error(ls, "dlm_slots_copy_in nodeid %d no slot", + memb->nodeid); + return -1; + } + } + + return 0; +} + +/* for any nodes that do not support slots, we will not have set memb->slot + in wait_status_all(), so memb->slot will remain -1, and we will not + assign slots or set ls_num_slots here */ + +int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, + struct dlm_slot **slots_out, uint32_t *gen_out) +{ + struct dlm_member *memb; + struct dlm_slot *array; + int our_nodeid = dlm_our_nodeid(); + int array_size, max_slots, i; + int need = 0; + int max = 0; + int num = 0; + uint32_t gen = 0; + + /* our own memb struct will have slot -1 gen 0 */ + + list_for_each_entry(memb, &ls->ls_nodes, list) { + if (memb->nodeid == our_nodeid) { + memb->slot = ls->ls_slot; + memb->generation = ls->ls_generation; + break; + } + } + + list_for_each_entry(memb, &ls->ls_nodes, list) { + if (memb->generation > gen) + gen = memb->generation; + + /* node doesn't support slots */ + + if (memb->slot == -1) + return -1; + + /* node needs a slot assigned */ + + if (!memb->slot) + need++; + + /* node has a slot assigned */ + + num++; + + if (!max || max < memb->slot) + max = memb->slot; + + /* sanity check, once slot is assigned it shouldn't change */ + + if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) { + log_error(ls, "nodeid %d slot changed %d %d", + memb->nodeid, memb->slot_prev, memb->slot); + return -1; + } + memb->slot_prev = memb->slot; + } + + array_size = max + need; + + array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS); + if (!array) + return -ENOMEM; + + num = 0; + + /* fill in slots (offsets) that are used */ + + list_for_each_entry(memb, &ls->ls_nodes, list) { + if (!memb->slot) + continue; + + if (memb->slot > array_size) { + log_error(ls, "invalid slot number %d", memb->slot); + kfree(array); + return -1; + } + + array[memb->slot - 1].nodeid = memb->nodeid; + array[memb->slot - 1].slot = memb->slot; + num++; + } + + /* assign new slots from unused offsets */ + + list_for_each_entry(memb, &ls->ls_nodes, list) { + if (memb->slot) + continue; + + for (i = 0; i < array_size; i++) { + if (array[i].nodeid) + continue; + + memb->slot = i + 1; + memb->slot_prev = memb->slot; + array[i].nodeid = memb->nodeid; + array[i].slot = memb->slot; + num++; + + if (!ls->ls_slot && memb->nodeid == our_nodeid) + ls->ls_slot = memb->slot; + break; + } + + if (!memb->slot) { + log_error(ls, "no free slot found"); + kfree(array); + return -1; + } + } + + gen++; + + log_debug_slots(ls, gen, num, NULL, array, array_size); + + max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - + sizeof(struct rcom_config)) / sizeof(struct rcom_slot); + + if (num > max_slots) { + log_error(ls, "num_slots %d exceeds max_slots %d", + num, max_slots); + kfree(array); + return -1; + } + + *gen_out = gen; + *slots_out = array; + *slots_size = array_size; + *num_slots = num; + return 0; +} + static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { struct dlm_member *memb = NULL; @@ -176,7 +450,7 @@ static int ping_members(struct dlm_ls *ls) error = dlm_recovery_stopped(ls); if (error) break; - error = dlm_rcom_status(ls, memb->nodeid); + error = dlm_rcom_status(ls, memb->nodeid, 0); if (error) break; } @@ -322,7 +596,15 @@ int dlm_ls_stop(struct dlm_ls *ls) */ dlm_recoverd_suspend(ls); + + spin_lock(&ls->ls_recover_lock); + kfree(ls->ls_slots); + ls->ls_slots = NULL; + ls->ls_num_slots = 0; + ls->ls_slots_size = 0; ls->ls_recover_status = 0; + spin_unlock(&ls->ls_recover_lock); + dlm_recoverd_resume(ls); if (!ls->ls_recover_begin) diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 7a26fca1e0b..7e87e8a79df 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h @@ -20,6 +20,13 @@ void dlm_clear_members_gone(struct dlm_ls *ls); int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); int dlm_is_removed(struct dlm_ls *ls, int nodeid); int dlm_is_member(struct dlm_ls *ls, int nodeid); +int dlm_slots_version(struct dlm_header *h); +void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, + struct dlm_member *memb); +void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc); +int dlm_slots_copy_in(struct dlm_ls *ls); +int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, + struct dlm_slot **slots_out, uint32_t *gen_out); #endif /* __MEMBER_DOT_H__ */ diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index f10a50f24e8..ac5c616c969 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -23,6 +23,7 @@ #include "memory.h" #include "lock.h" #include "util.h" +#include "member.h" static int rcom_response(struct dlm_ls *ls) @@ -72,20 +73,30 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, dlm_lowcomms_commit_buffer(mh); } +static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs, + uint32_t flags) +{ + rs->rs_flags = cpu_to_le32(flags); +} + /* When replying to a status request, a node also sends back its configuration values. The requesting node then checks that the remote node is configured the same way as itself. */ -static void make_config(struct dlm_ls *ls, struct rcom_config *rf) +static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf, + uint32_t num_slots) { rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); + + rf->rf_our_slot = cpu_to_le16(ls->ls_slot); + rf->rf_num_slots = cpu_to_le16(num_slots); + rf->rf_generation = cpu_to_le32(ls->ls_generation); } -static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) +static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; - size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { log_error(ls, "version mismatch: %x nodeid %d: %x", @@ -94,12 +105,6 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) return -EPROTO; } - if (rc->rc_header.h_length < conf_size) { - log_error(ls, "config too short: %d nodeid %d", - rc->rc_header.h_length, nodeid); - return -EPROTO; - } - if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", @@ -127,7 +132,18 @@ static void disallow_sync_reply(struct dlm_ls *ls) spin_unlock(&ls->ls_rcom_spin); } -int dlm_rcom_status(struct dlm_ls *ls, int nodeid) +/* + * low nodeid gathers one slot value at a time from each node. + * it sets need_slots=0, and saves rf_our_slot returned from each + * rcom_config. + * + * other nodes gather all slot values at once from the low nodeid. + * they set need_slots=1, and ignore the rf_our_slot returned from each + * rcom_config. they use the rf_num_slots returned from the low + * node's rcom_config. + */ + +int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) { struct dlm_rcom *rc; struct dlm_mhandle *mh; @@ -141,10 +157,13 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) goto out; } - error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); + error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, + sizeof(struct rcom_status), &rc, &mh); if (error) goto out; + set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags); + allow_sync_reply(ls, &rc->rc_id); memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); @@ -161,8 +180,11 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) /* we pretend the remote lockspace exists with 0 status */ log_debug(ls, "remote node %d not ready", nodeid); rc->rc_result = 0; - } else - error = check_config(ls, rc, nodeid); + error = 0; + } else { + error = check_rcom_config(ls, rc, nodeid); + } + /* the caller looks at rc_result for the remote recovery status */ out: return error; @@ -172,17 +194,60 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct dlm_mhandle *mh; - int error, nodeid = rc_in->rc_header.h_nodeid; + struct rcom_status *rs; + uint32_t status; + int nodeid = rc_in->rc_header.h_nodeid; + int len = sizeof(struct rcom_config); + int num_slots = 0; + int error; + + if (!dlm_slots_version(&rc_in->rc_header)) { + status = dlm_recover_status(ls); + goto do_create; + } + + rs = (struct rcom_status *)rc_in->rc_buf; + if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) { + status = dlm_recover_status(ls); + goto do_create; + } + + spin_lock(&ls->ls_recover_lock); + status = ls->ls_recover_status; + num_slots = ls->ls_num_slots; + spin_unlock(&ls->ls_recover_lock); + len += num_slots * sizeof(struct rcom_slot); + + do_create: error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, - sizeof(struct rcom_config), &rc, &mh); + len, &rc, &mh); if (error) return; + rc->rc_id = rc_in->rc_id; rc->rc_seq_reply = rc_in->rc_seq; - rc->rc_result = dlm_recover_status(ls); - make_config(ls, (struct rcom_config *) rc->rc_buf); + rc->rc_result = status; + + set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots); + + if (!num_slots) + goto do_send; + + spin_lock(&ls->ls_recover_lock); + if (ls->ls_num_slots != num_slots) { + spin_unlock(&ls->ls_recover_lock); + log_debug(ls, "receive_rcom_status num_slots %d to %d", + num_slots, ls->ls_num_slots); + rc->rc_result = 0; + set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0); + goto do_send; + } + + dlm_slots_copy_out(ls, rc); + spin_unlock(&ls->ls_recover_lock); + do_send: send_rcom(ls, mh, rc); } diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index b09abd29ba3..206723ab744 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h @@ -14,7 +14,7 @@ #ifndef __RCOM_DOT_H__ #define __RCOM_DOT_H__ -int dlm_rcom_status(struct dlm_ls *ls, int nodeid); +int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 81b23930449..34d5adf1fce 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -85,14 +85,20 @@ uint32_t dlm_recover_status(struct dlm_ls *ls) return status; } +static void _set_recover_status(struct dlm_ls *ls, uint32_t status) +{ + ls->ls_recover_status |= status; +} + void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) { spin_lock(&ls->ls_recover_lock); - ls->ls_recover_status |= status; + _set_recover_status(ls, status); spin_unlock(&ls->ls_recover_lock); } -static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) +static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, + int save_slots) { struct dlm_rcom *rc = ls->ls_recover_buf; struct dlm_member *memb; @@ -106,10 +112,13 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) goto out; } - error = dlm_rcom_status(ls, memb->nodeid); + error = dlm_rcom_status(ls, memb->nodeid, 0); if (error) goto out; + if (save_slots) + dlm_slot_save(ls, rc, memb); + if (rc->rc_result & wait_status) break; if (delay < 1000) @@ -121,7 +130,8 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) return error; } -static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) +static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, + uint32_t status_flags) { struct dlm_rcom *rc = ls->ls_recover_buf; int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; @@ -132,7 +142,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) goto out; } - error = dlm_rcom_status(ls, nodeid); + error = dlm_rcom_status(ls, nodeid, status_flags); if (error) break; @@ -152,18 +162,56 @@ static int wait_status(struct dlm_ls *ls, uint32_t status) int error; if (ls->ls_low_nodeid == dlm_our_nodeid()) { - error = wait_status_all(ls, status); + error = wait_status_all(ls, status, 0); if (!error) dlm_set_recover_status(ls, status_all); } else - error = wait_status_low(ls, status_all); + error = wait_status_low(ls, status_all, 0); return error; } int dlm_recover_members_wait(struct dlm_ls *ls) { - return wait_status(ls, DLM_RS_NODES); + struct dlm_member *memb; + struct dlm_slot *slots; + int num_slots, slots_size; + int error, rv; + uint32_t gen; + + list_for_each_entry(memb, &ls->ls_nodes, list) { + memb->slot = -1; + memb->generation = 0; + } + + if (ls->ls_low_nodeid == dlm_our_nodeid()) { + error = wait_status_all(ls, DLM_RS_NODES, 1); + if (error) + goto out; + + /* slots array is sparse, slots_size may be > num_slots */ + + rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); + if (!rv) { + spin_lock(&ls->ls_recover_lock); + _set_recover_status(ls, DLM_RS_NODES_ALL); + ls->ls_num_slots = num_slots; + ls->ls_slots_size = slots_size; + ls->ls_slots = slots; + ls->ls_generation = gen; + spin_unlock(&ls->ls_recover_lock); + } else { + dlm_set_recover_status(ls, DLM_RS_NODES_ALL); + } + } else { + error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); + if (error) + goto out; + + dlm_slots_copy_in(ls); + } + out: + return error; } int dlm_recover_directory_wait(struct dlm_ls *ls) -- cgit v1.2.3 From 60f98d1839376d30e13f3e452dce2433fad3060e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 2 Nov 2011 14:30:58 -0500 Subject: dlm: add recovery callbacks These new callbacks notify the dlm user about lock recovery. GFS2, and possibly others, need to be aware of when the dlm will be doing lock recovery for a failed lockspace member. In the past, this coordination has been done between dlm and file system daemons in userspace, which then direct their kernel counterparts. These callbacks allow the same coordination directly, and more simply. Signed-off-by: David Teigland --- fs/dlm/config.c | 130 ++++++++++++++++++--------------- fs/dlm/config.h | 17 ++++- fs/dlm/dlm_internal.h | 21 ++---- fs/dlm/lockspace.c | 43 +++++++++-- fs/dlm/member.c | 197 ++++++++++++++++++++++++++++++++------------------ fs/dlm/member.h | 3 +- fs/dlm/recoverd.c | 10 +-- fs/dlm/user.c | 5 +- fs/gfs2/lock_dlm.c | 4 +- fs/ocfs2/stack_user.c | 4 +- include/linux/dlm.h | 71 ++++++++++++++++-- 11 files changed, 333 insertions(+), 172 deletions(-) (limited to 'fs') diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 6cf72fcc0d0..e7e327d43fa 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,7 @@ static struct config_group *space_list; static struct config_group *comm_list; static struct dlm_comm *local_comm; +static uint32_t dlm_comm_count; struct dlm_clusters; struct dlm_cluster; @@ -103,6 +105,8 @@ struct dlm_cluster { unsigned int cl_timewarn_cs; unsigned int cl_waitwarn_us; unsigned int cl_new_rsb_count; + unsigned int cl_recover_callbacks; + char cl_cluster_name[DLM_LOCKSPACE_LEN]; }; enum { @@ -118,6 +122,8 @@ enum { CLUSTER_ATTR_TIMEWARN_CS, CLUSTER_ATTR_WAITWARN_US, CLUSTER_ATTR_NEW_RSB_COUNT, + CLUSTER_ATTR_RECOVER_CALLBACKS, + CLUSTER_ATTR_CLUSTER_NAME, }; struct cluster_attribute { @@ -126,6 +132,27 @@ struct cluster_attribute { ssize_t (*store)(struct dlm_cluster *, const char *, size_t); }; +static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) +{ + return sprintf(buf, "%s\n", cl->cl_cluster_name); +} + +static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, + const char *buf, size_t len) +{ + strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); + strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); + return len; +} + +static struct cluster_attribute cluster_attr_cluster_name = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "cluster_name", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = cluster_cluster_name_read, + .store = cluster_cluster_name_write, +}; + static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, int *info_field, int check_zero, const char *buf, size_t len) @@ -171,6 +198,7 @@ CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); CLUSTER_ATTR(waitwarn_us, 0); CLUSTER_ATTR(new_rsb_count, 0); +CLUSTER_ATTR(recover_callbacks, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -185,6 +213,8 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, + [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, + [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, NULL, }; @@ -293,6 +323,7 @@ struct dlm_comms { struct dlm_comm { struct config_item item; + int seq; int nodeid; int local; int addr_count; @@ -309,6 +340,7 @@ struct dlm_node { int nodeid; int weight; int new; + int comm_seq; /* copy of cm->seq when nd->nodeid is set */ }; static struct configfs_group_operations clusters_ops = { @@ -455,6 +487,9 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; + cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks; + memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name, + DLM_LOCKSPACE_LEN); space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -558,6 +593,11 @@ static struct config_item *make_comm(struct config_group *g, const char *name) return ERR_PTR(-ENOMEM); config_item_init_type_name(&cm->item, name, &comm_type); + + cm->seq = dlm_comm_count++; + if (!cm->seq) + cm->seq = dlm_comm_count++; + cm->nodeid = -1; cm->local = 0; cm->addr_count = 0; @@ -801,7 +841,10 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, size_t len) { + uint32_t seq = 0; nd->nodeid = simple_strtol(buf, NULL, 0); + dlm_comm_seq(nd->nodeid, &seq); + nd->comm_seq = seq; return len; } @@ -908,13 +951,13 @@ static void put_comm(struct dlm_comm *cm) } /* caller must free mem */ -int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, - int **new_out, int *new_count_out) +int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, + int *count_out) { struct dlm_space *sp; struct dlm_node *nd; - int i = 0, rv = 0, ids_count = 0, new_count = 0; - int *ids, *new; + struct dlm_config_node *nodes, *node; + int rv, count; sp = get_space(lsname); if (!sp) @@ -927,73 +970,42 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, goto out; } - ids_count = sp->members_count; + count = sp->members_count; - ids = kcalloc(ids_count, sizeof(int), GFP_NOFS); - if (!ids) { + nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS); + if (!nodes) { rv = -ENOMEM; goto out; } + node = nodes; list_for_each_entry(nd, &sp->members, list) { - ids[i++] = nd->nodeid; - if (nd->new) - new_count++; - } - - if (ids_count != i) - printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); - - if (!new_count) - goto out_ids; + node->nodeid = nd->nodeid; + node->weight = nd->weight; + node->new = nd->new; + node->comm_seq = nd->comm_seq; + node++; - new = kcalloc(new_count, sizeof(int), GFP_NOFS); - if (!new) { - kfree(ids); - rv = -ENOMEM; - goto out; + nd->new = 0; } - i = 0; - list_for_each_entry(nd, &sp->members, list) { - if (nd->new) { - new[i++] = nd->nodeid; - nd->new = 0; - } - } - *new_count_out = new_count; - *new_out = new; - - out_ids: - *ids_count_out = ids_count; - *ids_out = ids; + *count_out = count; + *nodes_out = nodes; + rv = 0; out: mutex_unlock(&sp->members_lock); put_space(sp); return rv; } -int dlm_node_weight(char *lsname, int nodeid) +int dlm_comm_seq(int nodeid, uint32_t *seq) { - struct dlm_space *sp; - struct dlm_node *nd; - int w = -EEXIST; - - sp = get_space(lsname); - if (!sp) - goto out; - - mutex_lock(&sp->members_lock); - list_for_each_entry(nd, &sp->members, list) { - if (nd->nodeid != nodeid) - continue; - w = nd->weight; - break; - } - mutex_unlock(&sp->members_lock); - put_space(sp); - out: - return w; + struct dlm_comm *cm = get_comm(nodeid, NULL); + if (!cm) + return -EEXIST; + *seq = cm->seq; + put_comm(cm); + return 0; } int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) @@ -1047,6 +1059,8 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ #define DEFAULT_WAITWARN_US 0 #define DEFAULT_NEW_RSB_COUNT 128 +#define DEFAULT_RECOVER_CALLBACKS 0 +#define DEFAULT_CLUSTER_NAME "" struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -1060,6 +1074,8 @@ struct dlm_config_info dlm_config = { .ci_protocol = DEFAULT_PROTOCOL, .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, .ci_waitwarn_us = DEFAULT_WAITWARN_US, - .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT + .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT, + .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS, + .ci_cluster_name = DEFAULT_CLUSTER_NAME }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 3099d0dd26c..9f5e3663bb0 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -14,6 +14,13 @@ #ifndef __CONFIG_DOT_H__ #define __CONFIG_DOT_H__ +struct dlm_config_node { + int nodeid; + int weight; + int new; + uint32_t comm_seq; +}; + #define DLM_MAX_ADDR_COUNT 3 struct dlm_config_info { @@ -29,15 +36,17 @@ struct dlm_config_info { int ci_timewarn_cs; int ci_waitwarn_us; int ci_new_rsb_count; + int ci_recover_callbacks; + char ci_cluster_name[DLM_LOCKSPACE_LEN]; }; extern struct dlm_config_info dlm_config; int dlm_config_init(void); void dlm_config_exit(void); -int dlm_node_weight(char *lsname, int nodeid); -int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, - int **new_out, int *new_count_out); +int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, + int *count_out); +int dlm_comm_seq(int nodeid, uint32_t *seq); int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index f4d132c7690..3a564d197e9 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -119,28 +119,18 @@ struct dlm_member { int weight; int slot; int slot_prev; + int comm_seq; uint32_t generation; }; -/* - * low nodeid saves array of these in ls_slots - */ - -struct dlm_slot { - int nodeid; - int slot; -}; - /* * Save and manage recovery state for a lockspace. */ struct dlm_recover { struct list_head list; - int *nodeids; /* nodeids of all members */ - int node_count; - int *new; /* nodeids of new members */ - int new_count; + struct dlm_config_node *nodes; + int nodes_count; uint64_t seq; }; @@ -584,6 +574,9 @@ struct dlm_ls { struct list_head ls_root_list; /* root resources */ struct rw_semaphore ls_root_sem; /* protect root_list */ + const struct dlm_lockspace_ops *ls_ops; + void *ls_ops_arg; + int ls_namelen; char ls_name[1]; }; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 1441f04bfab..a1ea25face8 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -386,12 +386,15 @@ static void threads_stop(void) dlm_lowcomms_stop(); } -static int new_lockspace(const char *name, int namelen, void **lockspace, - uint32_t flags, int lvblen) +static int new_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, void *ops_arg, + int *ops_result, dlm_lockspace_t **lockspace) { struct dlm_ls *ls; int i, size, error; int do_unreg = 0; + int namelen = strlen(name); if (namelen > DLM_LOCKSPACE_LEN) return -EINVAL; @@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, return -EINVAL; if (!dlm_user_daemon_available()) { - module_put(THIS_MODULE); - return -EUNATCH; + log_print("dlm user daemon not available"); + error = -EUNATCH; + goto out; + } + + if (ops && ops_result) { + if (!dlm_config.ci_recover_callbacks) + *ops_result = -EOPNOTSUPP; + else + *ops_result = 0; + } + + if (dlm_config.ci_recover_callbacks && cluster && + strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) { + log_print("dlm cluster name %s mismatch %s", + dlm_config.ci_cluster_name, cluster); + error = -EBADR; + goto out; } error = 0; @@ -442,6 +461,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, ls->ls_flags = 0; ls->ls_scan_time = jiffies; + if (ops && dlm_config.ci_recover_callbacks) { + ls->ls_ops = ops; + ls->ls_ops_arg = ops_arg; + } + if (flags & DLM_LSFL_TIMEWARN) set_bit(LSFL_TIMEWARN, &ls->ls_flags); @@ -619,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, return error; } -int dlm_new_lockspace(const char *name, int namelen, void **lockspace, - uint32_t flags, int lvblen) +int dlm_new_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, void *ops_arg, + int *ops_result, dlm_lockspace_t **lockspace) { int error = 0; @@ -630,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace, if (error) goto out; - error = new_lockspace(name, namelen, lockspace, flags, lvblen); + error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, + ops_result, lockspace); if (!error) ls_count++; if (error > 0) diff --git a/fs/dlm/member.c b/fs/dlm/member.c index eebc52aae82..862640a36d5 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h) } void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, - struct dlm_member *memb) + struct dlm_member *memb) { struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; @@ -317,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) } } -static int dlm_add_member(struct dlm_ls *ls, int nodeid) +static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node) { struct dlm_member *memb; - int w, error; + int error; memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); if (!memb) return -ENOMEM; - w = dlm_node_weight(ls->ls_name, nodeid); - if (w < 0) { - kfree(memb); - return w; - } - - error = dlm_lowcomms_connect_node(nodeid); + error = dlm_lowcomms_connect_node(node->nodeid); if (error < 0) { kfree(memb); return error; } - memb->nodeid = nodeid; - memb->weight = w; + memb->nodeid = node->nodeid; + memb->weight = node->weight; + memb->comm_seq = node->comm_seq; add_ordered_member(ls, memb); ls->ls_num_nodes++; return 0; } -static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) -{ - list_move(&memb->list, &ls->ls_nodes_gone); - ls->ls_num_nodes--; -} - -int dlm_is_member(struct dlm_ls *ls, int nodeid) +static struct dlm_member *find_memb(struct list_head *head, int nodeid) { struct dlm_member *memb; - list_for_each_entry(memb, &ls->ls_nodes, list) { + list_for_each_entry(memb, head, list) { if (memb->nodeid == nodeid) - return 1; + return memb; } + return NULL; +} + +int dlm_is_member(struct dlm_ls *ls, int nodeid) +{ + if (find_memb(&ls->ls_nodes, nodeid)) + return 1; return 0; } int dlm_is_removed(struct dlm_ls *ls, int nodeid) { - struct dlm_member *memb; - - list_for_each_entry(memb, &ls->ls_nodes_gone, list) { - if (memb->nodeid == nodeid) - return 1; - } + if (find_memb(&ls->ls_nodes_gone, nodeid)) + return 1; return 0; } @@ -460,10 +452,88 @@ static int ping_members(struct dlm_ls *ls) return error; } +static void dlm_lsop_recover_prep(struct dlm_ls *ls) +{ + if (!ls->ls_ops || !ls->ls_ops->recover_prep) + return; + ls->ls_ops->recover_prep(ls->ls_ops_arg); +} + +static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) +{ + struct dlm_slot slot; + uint32_t seq; + int error; + + if (!ls->ls_ops || !ls->ls_ops->recover_slot) + return; + + /* if there is no comms connection with this node + or the present comms connection is newer + than the one when this member was added, then + we consider the node to have failed (versus + being removed due to dlm_release_lockspace) */ + + error = dlm_comm_seq(memb->nodeid, &seq); + + if (!error && seq == memb->comm_seq) + return; + + slot.nodeid = memb->nodeid; + slot.slot = memb->slot; + + ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot); +} + +void dlm_lsop_recover_done(struct dlm_ls *ls) +{ + struct dlm_member *memb; + struct dlm_slot *slots; + int i, num; + + if (!ls->ls_ops || !ls->ls_ops->recover_done) + return; + + num = ls->ls_num_nodes; + + slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL); + if (!slots) + return; + + i = 0; + list_for_each_entry(memb, &ls->ls_nodes, list) { + if (i == num) { + log_error(ls, "dlm_lsop_recover_done bad num %d", num); + goto out; + } + slots[i].nodeid = memb->nodeid; + slots[i].slot = memb->slot; + i++; + } + + ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num, + ls->ls_slot, ls->ls_generation); + out: + kfree(slots); +} + +static struct dlm_config_node *find_config_node(struct dlm_recover *rv, + int nodeid) +{ + int i; + + for (i = 0; i < rv->nodes_count; i++) { + if (rv->nodes[i].nodeid == nodeid) + return &rv->nodes[i]; + } + return NULL; +} + int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) { struct dlm_member *memb, *safe; - int i, error, found, pos = 0, neg = 0, low = -1; + struct dlm_config_node *node; + int i, error, neg = 0, low = -1; /* previously removed members that we've not finished removing need to count as a negative change so the "neg" recovery steps will happen */ @@ -476,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) /* move departed members from ls_nodes to ls_nodes_gone */ list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { - found = 0; - for (i = 0; i < rv->node_count; i++) { - if (memb->nodeid == rv->nodeids[i]) { - found = 1; - break; - } - } + node = find_config_node(rv, memb->nodeid); + if (node && !node->new) + continue; - if (!found) { - neg++; - dlm_remove_member(ls, memb); + if (!node) { log_debug(ls, "remove member %d", memb->nodeid); + } else { + /* removed and re-added */ + log_debug(ls, "remove member %d comm_seq %u %u", + memb->nodeid, memb->comm_seq, node->comm_seq); } - } - - /* Add an entry to ls_nodes_gone for members that were removed and - then added again, so that previous state for these nodes will be - cleared during recovery. */ - for (i = 0; i < rv->new_count; i++) { - if (!dlm_is_member(ls, rv->new[i])) - continue; - log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); - - memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); - if (!memb) - return -ENOMEM; - memb->nodeid = rv->new[i]; - list_add_tail(&memb->list, &ls->ls_nodes_gone); neg++; + list_move(&memb->list, &ls->ls_nodes_gone); + ls->ls_num_nodes--; + dlm_lsop_recover_slot(ls, memb); } /* add new members to ls_nodes */ - for (i = 0; i < rv->node_count; i++) { - if (dlm_is_member(ls, rv->nodeids[i])) + for (i = 0; i < rv->nodes_count; i++) { + node = &rv->nodes[i]; + if (dlm_is_member(ls, node->nodeid)) continue; - dlm_add_member(ls, rv->nodeids[i]); - pos++; - log_debug(ls, "add member %d", rv->nodeids[i]); + dlm_add_member(ls, node); + log_debug(ls, "add member %d", node->nodeid); } list_for_each_entry(memb, &ls->ls_nodes, list) { @@ -609,21 +665,22 @@ int dlm_ls_stop(struct dlm_ls *ls) if (!ls->ls_recover_begin) ls->ls_recover_begin = jiffies; + + dlm_lsop_recover_prep(ls); return 0; } int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL, *new = NULL; - int error, ids_count = 0, new_count = 0; + struct dlm_config_node *nodes; + int error, count; rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); if (!rv) return -ENOMEM; - error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, - &new, &new_count); + error = dlm_config_nodes(ls->ls_name, &nodes, &count); if (error < 0) goto fail; @@ -638,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls) goto fail; } - rv->nodeids = ids; - rv->node_count = ids_count; - rv->new = new; - rv->new_count = new_count; + rv->nodes = nodes; + rv->nodes_count = count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; @@ -649,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls) if (rv_old) { log_error(ls, "unused recovery %llx %d", - (unsigned long long)rv_old->seq, rv_old->node_count); - kfree(rv_old->nodeids); - kfree(rv_old->new); + (unsigned long long)rv_old->seq, rv_old->nodes_count); + kfree(rv_old->nodes); kfree(rv_old); } @@ -660,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); - kfree(ids); - kfree(new); + kfree(nodes); return error; } diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 7e87e8a79df..3deb70661c6 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -27,6 +27,7 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc); int dlm_slots_copy_in(struct dlm_ls *ls); int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, struct dlm_slot **slots_out, uint32_t *gen_out); +void dlm_lsop_recover_done(struct dlm_ls *ls); #endif /* __MEMBER_DOT_H__ */ diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 5a9e1a49a86..3780caf7ae0 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -227,11 +227,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_grant_after_purge(ls); - log_debug(ls, "dlm_recover %llx done: %u ms", - (unsigned long long)rv->seq, + log_debug(ls, "dlm_recover %llx generation %u done: %u ms", + (unsigned long long)rv->seq, ls->ls_generation, jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); + dlm_lsop_recover_done(ls); return 0; fail: @@ -259,8 +260,7 @@ static void do_ls_recovery(struct dlm_ls *ls) if (rv) { ls_recover(ls, rv); - kfree(rv->nodeids); - kfree(rv->new); + kfree(rv->nodes); kfree(rv); } } diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d8ea6075640..eb4ed9ba309 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -392,8 +392,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = dlm_new_lockspace(params->name, strlen(params->name), - &lockspace, params->flags, DLM_USER_LVB_LEN); + error = dlm_new_lockspace(params->name, NULL, params->flags, + DLM_USER_LVB_LEN, NULL, NULL, NULL, + &lockspace); if (error) return error; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 98c80d8c2a6..ce85b62bc0a 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -195,10 +195,10 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) return -EINVAL; } - error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm, + error = dlm_new_lockspace(fsname, NULL, DLM_LSFL_FS | DLM_LSFL_NEWEXCL | (ls->ls_nodir ? DLM_LSFL_NODIR : 0), - GDLM_LVB_SIZE); + GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm); if (error) printk(KERN_ERR "dlm_new_lockspace error %d", error); diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index a5ebe421195..286edf1e231 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -827,8 +827,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) goto out; } - rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), - &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); + rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, + NULL, NULL, NULL, &fsdlm); if (rc) { ocfs2_live_connection_drop(control); goto out; diff --git a/include/linux/dlm.h b/include/linux/dlm.h index d4e02f5353a..6c7f6e9546c 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -74,15 +74,76 @@ struct dlm_lksb { #ifdef __KERNEL__ +struct dlm_slot { + int nodeid; /* 1 to MAX_INT */ + int slot; /* 1 to MAX_INT */ +}; + +/* + * recover_prep: called before the dlm begins lock recovery. + * Notfies lockspace user that locks from failed members will be granted. + * recover_slot: called after recover_prep and before recover_done. + * Identifies a failed lockspace member. + * recover_done: called after the dlm completes lock recovery. + * Identifies lockspace members and lockspace generation number. + */ + +struct dlm_lockspace_ops { + void (*recover_prep) (void *ops_arg); + void (*recover_slot) (void *ops_arg, struct dlm_slot *slot); + void (*recover_done) (void *ops_arg, struct dlm_slot *slots, + int num_slots, int our_slot, uint32_t generation); +}; + /* * dlm_new_lockspace * - * Starts a lockspace with the given name. If the named lockspace exists in - * the cluster, the calling node joins it. + * Create/join a lockspace. + * + * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not + * including terminating null). + * + * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not + * including terminating null). Optional. When cluster is null, it + * is not used. When set, dlm_new_lockspace() returns -EBADR if cluster + * is not equal to the dlm cluster name. + * + * flags: + * DLM_LSFL_NODIR + * The dlm should not use a resource directory, but statically assign + * resource mastery to nodes based on the name hash that is otherwise + * used to select the directory node. Must be the same on all nodes. + * DLM_LSFL_TIMEWARN + * The dlm should emit netlink messages if locks have been waiting + * for a configurable amount of time. (Unused.) + * DLM_LSFL_FS + * The lockspace user is in the kernel (i.e. filesystem). Enables + * direct bast/cast callbacks. + * DLM_LSFL_NEWEXCL + * dlm_new_lockspace() should return -EEXIST if the lockspace exists. + * + * lvblen: length of lvb in bytes. Must be multiple of 8. + * dlm_new_lockspace() returns an error if this does not match + * what other nodes are using. + * + * ops: callbacks that indicate lockspace recovery points so the + * caller can coordinate its recovery and know lockspace members. + * This is only used by the initial dlm_new_lockspace() call. + * Optional. + * + * ops_arg: arg for ops callbacks. + * + * ops_result: tells caller if the ops callbacks (if provided) will + * be used or not. 0: will be used, -EXXX will not be used. + * -EOPNOTSUPP: the dlm does not have recovery_callbacks enabled. + * + * lockspace: handle for dlm functions */ -int dlm_new_lockspace(const char *name, int namelen, - dlm_lockspace_t **lockspace, uint32_t flags, int lvblen); +int dlm_new_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, void *ops_arg, + int *ops_result, dlm_lockspace_t **lockspace); /* * dlm_release_lockspace -- cgit v1.2.3 From 5b25f70f4200766355cdabda604e131d2fb6010d Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 10:55:48 +0200 Subject: Btrfs: add nested locking mode for paths This patch adds the possibilty to read-lock an extent even if it is already write-locked from the same thread. btrfs_find_all_roots() needs this capability. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/extent_io.c | 1 + fs/btrfs/extent_io.h | 2 ++ fs/btrfs/locking.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be1bf627a14..dd8d140eb27 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3571,6 +3571,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, atomic_set(&eb->blocking_writers, 0); atomic_set(&eb->spinning_readers, 0); atomic_set(&eb->spinning_writers, 0); + eb->lock_nested = 0; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 7604c300132..bc6a042cb6f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -129,6 +129,7 @@ struct extent_buffer { struct list_head leak_list; struct rcu_head rcu_head; atomic_t refs; + pid_t lock_owner; /* count of read lock holders on the extent buffer */ atomic_t write_locks; @@ -137,6 +138,7 @@ struct extent_buffer { atomic_t blocking_readers; atomic_t spinning_readers; atomic_t spinning_writers; + int lock_nested; /* protects write locks */ rwlock_t lock; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d77b67c4b27..5e178d8f716 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb); */ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) { + if (eb->lock_nested) { + read_lock(&eb->lock); + if (eb->lock_nested && current->pid == eb->lock_owner) { + read_unlock(&eb->lock); + return; + } + read_unlock(&eb->lock); + } if (rw == BTRFS_WRITE_LOCK) { if (atomic_read(&eb->blocking_writers) == 0) { WARN_ON(atomic_read(&eb->spinning_writers) != 1); @@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) */ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { + if (eb->lock_nested) { + read_lock(&eb->lock); + if (&eb->lock_nested && current->pid == eb->lock_owner) { + read_unlock(&eb->lock); + return; + } + read_unlock(&eb->lock); + } if (rw == BTRFS_WRITE_LOCK_BLOCKING) { BUG_ON(atomic_read(&eb->blocking_writers) != 1); write_lock(&eb->lock); @@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) void btrfs_tree_read_lock(struct extent_buffer *eb) { again: + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers) && + current->pid == eb->lock_owner) { + /* + * This extent is already write-locked by our thread. We allow + * an additional read lock to be added because it's for the same + * thread. btrfs_find_all_roots() depends on this as it may be + * called on a partly (write-)locked tree. + */ + BUG_ON(eb->lock_nested); + eb->lock_nested = 1; + read_unlock(&eb->lock); + return; + } + read_unlock(&eb->lock); wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); read_lock(&eb->lock); if (atomic_read(&eb->blocking_writers)) { read_unlock(&eb->lock); - wait_event(eb->write_lock_wq, - atomic_read(&eb->blocking_writers) == 0); goto again; } atomic_inc(&eb->read_locks); @@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) } atomic_inc(&eb->write_locks); atomic_inc(&eb->spinning_writers); + eb->lock_owner = current->pid; return 1; } @@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) */ void btrfs_tree_read_unlock(struct extent_buffer *eb) { + if (eb->lock_nested) { + read_lock(&eb->lock); + if (eb->lock_nested && current->pid == eb->lock_owner) { + eb->lock_nested = 0; + read_unlock(&eb->lock); + return; + } + read_unlock(&eb->lock); + } btrfs_assert_tree_read_locked(eb); WARN_ON(atomic_read(&eb->spinning_readers) == 0); atomic_dec(&eb->spinning_readers); @@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) */ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) { + if (eb->lock_nested) { + read_lock(&eb->lock); + if (eb->lock_nested && current->pid == eb->lock_owner) { + eb->lock_nested = 0; + read_unlock(&eb->lock); + return; + } + read_unlock(&eb->lock); + } btrfs_assert_tree_read_locked(eb); WARN_ON(atomic_read(&eb->blocking_readers) == 0); if (atomic_dec_and_test(&eb->blocking_readers)) @@ -181,6 +229,7 @@ again: WARN_ON(atomic_read(&eb->spinning_writers)); atomic_inc(&eb->spinning_writers); atomic_inc(&eb->write_locks); + eb->lock_owner = current->pid; return 0; } -- cgit v1.2.3 From 00f04b88791ff49dc64ada18819d40a5b0671709 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 12:37:00 +0200 Subject: Btrfs: add sequence numbers to delayed refs Sequence numbers are needed to reconstruct the backrefs of a given extent to a certain point in time. The total set of backrefs consist of the set of backrefs recorded on disk plus the enqueued delayed refs for it that existed at that moment. This patch also adds a list that records all delayed refs which are currently in the process of being added. When walking all refs of an extent in btrfs_find_all_roots(), we freeze the current state of delayed refs, honor anythinh up to this point and prevent processing newer delayed refs to assert consistency. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/delayed-ref.c | 34 ++++++++++++++++++++++++ fs/btrfs/delayed-ref.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 4 +++ 3 files changed, 108 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index babd37badb4..a405db0320e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, return -1; if (ref1->type > ref2->type) return 1; + /* merging of sequenced refs is not allowed */ + if (ref1->seq < ref2->seq) + return -1; + if (ref1->seq > ref2->seq) + return 1; if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), @@ -209,6 +214,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } +int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + u64 seq) +{ + struct seq_list *elem; + + assert_spin_locked(&delayed_refs->lock); + if (list_empty(&delayed_refs->seq_head)) + return 0; + + elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); + if (seq >= elem->seq) { + pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", + seq, elem->seq, delayed_refs); + return 1; + } + return 0; +} + int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, struct list_head *cluster, u64 start) { @@ -438,6 +461,7 @@ static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, ref->action = 0; ref->is_head = 1; ref->in_tree = 1; + ref->seq = 0; head_ref = btrfs_delayed_node_to_head(ref); head_ref->must_insert_reserved = must_insert_reserved; @@ -479,6 +503,7 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; + u64 seq = 0; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -494,6 +519,10 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; + if (need_ref_seq(for_cow, ref_root)) + seq = inc_delayed_seq(delayed_refs); + ref->seq = seq; + full_ref = btrfs_delayed_node_to_tree_ref(ref); full_ref->parent = parent; full_ref->root = ref_root; @@ -534,6 +563,7 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; + u64 seq = 0; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -549,6 +579,10 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; + if (need_ref_seq(for_cow, ref_root)) + seq = inc_delayed_seq(delayed_refs); + ref->seq = seq; + full_ref = btrfs_delayed_node_to_data_ref(ref); full_ref->parent = parent; full_ref->root = ref_root; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index a5fb2bc8373..174416f7882 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node { /* the size of the extent */ u64 num_bytes; + /* seq number to keep track of insertion order */ + u64 seq; + /* ref count on this data structure */ atomic_t refs; @@ -136,6 +139,20 @@ struct btrfs_delayed_ref_root { int flushing; u64 run_delayed_start; + + /* + * seq number of delayed refs. We need to know if a backref was being + * added before the currently processed ref or afterwards. + */ + u64 seq; + + /* + * seq_list holds a list of all seq numbers that are currently being + * added to the list. While walking backrefs (btrfs_find_all_roots, + * qgroups), which might take some time, no newer ref must be processed, + * as it might influence the outcome of the walk. + */ + struct list_head seq_head; }; static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) @@ -171,6 +188,59 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head); int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, struct list_head *cluster, u64 search_start); + +struct seq_list { + struct list_head list; + u64 seq; +}; + +static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) +{ + assert_spin_locked(&delayed_refs->lock); + ++delayed_refs->seq; + return delayed_refs->seq; +} + +static inline void +btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + struct seq_list *elem) +{ + assert_spin_locked(&delayed_refs->lock); + elem->seq = delayed_refs->seq; + list_add_tail(&elem->list, &delayed_refs->seq_head); +} + +static inline void +btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + struct seq_list *elem) +{ + spin_lock(&delayed_refs->lock); + list_del(&elem->list); + spin_unlock(&delayed_refs->lock); +} + +int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, + u64 seq); + +/* + * delayed refs with a ref_seq > 0 must be held back during backref walking. + * this only applies to items in one of the fs-trees. for_cow items never need + * to be held back, so they won't get a ref_seq number. + */ +static inline int need_ref_seq(int for_cow, u64 rootid) +{ + if (for_cow) + return 0; + + if (rootid == BTRFS_FS_TREE_OBJECTID) + return 1; + + if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) + return 1; + + return 0; +} + /* * a node might live in a head or a regular ref, this lets you * test for the proper type to use. diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2bfedcbcab..31a7393af64 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); + WARN_ON(transaction->delayed_refs.root.rb_node); + WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -108,8 +110,10 @@ loop: cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; + cur_trans->delayed_refs.seq = 1; spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); + INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); -- cgit v1.2.3 From d1270cd91f308c9d22b2804720c36ccd32dbc35e Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 15:16:43 +0200 Subject: Btrfs: put back delayed refs that are too new When processing a delayed ref, first check if there are still old refs in the process of being added. If so, put this ref back to the tree. To avoid looping on this ref, choose a newer one in the next loop. btrfs_find_ref_cluster has to take care of that. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/delayed-ref.c | 43 +++++++++++++++++++++++++------------------ fs/btrfs/extent-tree.c | 27 ++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index a405db0320e..ee181989d44 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -155,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, /* * find an head entry based on bytenr. This returns the delayed ref - * head if it was able to find one, or NULL if nothing was in that spot + * head if it was able to find one, or NULL if nothing was in that spot. + * If return_bigger is given, the next bigger entry is returned if no exact + * match is found. */ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, u64 bytenr, - struct btrfs_delayed_ref_node **last) + struct btrfs_delayed_ref_node **last, + int return_bigger) { - struct rb_node *n = root->rb_node; + struct rb_node *n; struct btrfs_delayed_ref_node *entry; - int cmp; + int cmp = 0; +again: + n = root->rb_node; + entry = NULL; while (n) { entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); WARN_ON(!entry->in_tree); @@ -187,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, else return entry; } + if (entry && return_bigger) { + if (cmp > 0) { + n = rb_next(&entry->rb_node); + if (!n) + n = rb_first(root); + entry = rb_entry(n, struct btrfs_delayed_ref_node, + rb_node); + bytenr = entry->bytenr; + return_bigger = 0; + goto again; + } + return entry; + } return NULL; } @@ -246,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, node = rb_first(&delayed_refs->root); } else { ref = NULL; - find_ref_head(&delayed_refs->root, start, &ref); + find_ref_head(&delayed_refs->root, start + 1, &ref, 1); if (ref) { - struct btrfs_delayed_ref_node *tmp; - - node = rb_prev(&ref->rb_node); - while (node) { - tmp = rb_entry(node, - struct btrfs_delayed_ref_node, - rb_node); - if (tmp->bytenr < start) - break; - ref = tmp; - node = rb_prev(&ref->rb_node); - } node = &ref->rb_node; } else node = rb_first(&delayed_refs->root); @@ -748,7 +755,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) struct btrfs_delayed_ref_root *delayed_refs; delayed_refs = &trans->transaction->delayed_refs; - ref = find_ref_head(&delayed_refs->root, bytenr, NULL); + ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0); if (ref) return btrfs_delayed_node_to_head(ref); return NULL; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc8b9a83459..bbcca12fbbb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2236,6 +2236,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } } + /* + * locked_ref is the head node, so we have to go one + * node back for any delayed ref updates + */ + ref = select_delayed_ref(locked_ref); + + if (ref && ref->seq && + btrfs_check_delayed_seq(delayed_refs, ref->seq)) { + /* + * there are still refs with lower seq numbers in the + * process of being added. Don't run this ref yet. + */ + list_del_init(&locked_ref->cluster); + mutex_unlock(&locked_ref->mutex); + locked_ref = NULL; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); + cond_resched(); + spin_lock(&delayed_refs->lock); + continue; + } + /* * record the must insert reserved flag before we * drop the spin lock. @@ -2246,11 +2268,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, extent_op = locked_ref->extent_op; locked_ref->extent_op = NULL; - /* - * locked_ref is the head node, so we have to go one - * node back for any delayed ref updates - */ - ref = select_delayed_ref(locked_ref); if (!ref) { /* All delayed refs have been processed, Go ahead * and send the head node to run_one_delayed_ref, -- cgit v1.2.3 From a168650c08300434e1456abe7b6451f1448230d3 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 12 Dec 2011 16:10:07 +0100 Subject: Btrfs: add waitqueue instead of doing busy waiting for more delayed refs Now that we may be holding back delayed refs for a limited period, we might end up having no runnable delayed refs. Without this commit, we'd do busy waiting in that thread until another (runnable) ref arives. Instead, we're detecting this situation and use a waitqueue, such that we only try to run more refs after a) another runnable ref was added or b) delayed refs are no longer held back Signed-off-by: Jan Schmidt --- fs/btrfs/delayed-ref.c | 8 +++++++ fs/btrfs/delayed-ref.h | 7 ++++++ fs/btrfs/extent-tree.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/transaction.c | 1 + 4 files changed, 74 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ee181989d44..66e4f29505a 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -664,6 +664,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, num_bytes, parent, ref_root, level, action, for_cow); BUG_ON(ret); + if (!need_ref_seq(for_cow, ref_root) && + waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); return 0; } @@ -712,6 +715,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, num_bytes, parent, ref_root, owner, offset, action, for_cow); BUG_ON(ret); + if (!need_ref_seq(for_cow, ref_root) && + waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); return 0; } @@ -739,6 +745,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, extent_op->is_data); BUG_ON(ret); + if (waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 174416f7882..d8f244d9492 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -153,6 +153,12 @@ struct btrfs_delayed_ref_root { * as it might influence the outcome of the walk. */ struct list_head seq_head; + + /* + * when the only refs we have in the list must not be processed, we want + * to wait for more refs to show up or for the end of backref walking. + */ + wait_queue_head_t seq_wait; }; static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) @@ -216,6 +222,7 @@ btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, { spin_lock(&delayed_refs->lock); list_del(&elem->list); + wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbcca12fbbb..0a435e2e143 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2300,7 +2300,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; - + /* + * we modified num_entries, but as we're currently running + * delayed refs, skip + * wake_up(&delayed_refs->seq_wait); + * here. + */ spin_unlock(&delayed_refs->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2317,6 +2322,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, return count; } + +static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, + unsigned long num_refs) +{ + struct list_head *first_seq = delayed_refs->seq_head.next; + + spin_unlock(&delayed_refs->lock); + pr_debug("waiting for more refs (num %ld, first %p)\n", + num_refs, first_seq); + wait_event(delayed_refs->seq_wait, + num_refs != delayed_refs->num_entries || + delayed_refs->seq_head.next != first_seq); + pr_debug("done waiting for more refs (num %ld, first %p)\n", + delayed_refs->num_entries, delayed_refs->seq_head.next); + spin_lock(&delayed_refs->lock); +} + /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be @@ -2332,8 +2354,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; struct list_head cluster; int ret; + u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; + unsigned long num_refs = 0; + int consider_waiting; if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; @@ -2341,6 +2366,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: + consider_waiting = 0; spin_lock(&delayed_refs->lock); if (count == 0) { count = delayed_refs->num_entries * 2; @@ -2357,11 +2383,35 @@ again: * of refs to process starting at the first one we are able to * lock */ + delayed_start = delayed_refs->run_delayed_start; ret = btrfs_find_ref_cluster(trans, &cluster, delayed_refs->run_delayed_start); if (ret) break; + if (delayed_start >= delayed_refs->run_delayed_start) { + if (consider_waiting == 0) { + /* + * btrfs_find_ref_cluster looped. let's do one + * more cycle. if we don't run any delayed ref + * during that cycle (because we can't because + * all of them are blocked) and if the number of + * refs doesn't change, we avoid busy waiting. + */ + consider_waiting = 1; + num_refs = delayed_refs->num_entries; + } else { + wait_for_more_refs(delayed_refs, num_refs); + /* + * after waiting, things have changed. we + * dropped the lock and someone else might have + * run some refs, built new clusters and so on. + * therefore, we restart staleness detection. + */ + consider_waiting = 0; + } + } + ret = run_clustered_refs(trans, root, &cluster); BUG_ON(ret < 0); @@ -2369,6 +2419,11 @@ again: if (count == 0) break; + + if (ret || delayed_refs->run_delayed_start == 0) { + /* refs were run, let's reset staleness detection */ + consider_waiting = 0; + } } if (run_all) { @@ -4933,6 +4988,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, rb_erase(&head->node.rb_node, &delayed_refs->root); delayed_refs->num_entries--; + if (waitqueue_active(&delayed_refs->seq_wait)) + wake_up(&delayed_refs->seq_wait); /* * we don't take a ref on the node because we're removing it from the diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 31a7393af64..04c5c7c2c32 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -111,6 +111,7 @@ loop: cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; cur_trans->delayed_refs.seq = 1; + init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); -- cgit v1.2.3 From 8da6d5815c592b713ecaf4f4f8b631f8359c96c4 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 23 Nov 2011 18:55:04 +0100 Subject: Btrfs: added btrfs_find_all_roots() This function gets a byte number (a data extent), collects all the leafs pointing to it and walks up the trees to find all fs roots pointing to those leafs. It also returns the list of all leafs pointing to that extent. It does proper locking for the involved trees, can be used on busy file systems and honors delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 783 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/backref.h | 5 + 2 files changed, 788 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 22c64fff1bd..03c30a1836f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -19,6 +19,9 @@ #include "ctree.h" #include "disk-io.h" #include "backref.h" +#include "ulist.h" +#include "transaction.h" +#include "delayed-ref.h" struct __data_ref { struct list_head list; @@ -32,6 +35,786 @@ struct __shared_ref { u64 disk_byte; }; +/* + * this structure records all encountered refs on the way up to the root + */ +struct __prelim_ref { + struct list_head list; + u64 root_id; + struct btrfs_key key; + int level; + int count; + u64 parent; + u64 wanted_disk_byte; +}; + +static int __add_prelim_ref(struct list_head *head, u64 root_id, + struct btrfs_key *key, int level, u64 parent, + u64 wanted_disk_byte, int count) +{ + struct __prelim_ref *ref; + + /* in case we're adding delayed refs, we're holding the refs spinlock */ + ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + if (!ref) + return -ENOMEM; + + ref->root_id = root_id; + if (key) + ref->key = *key; + else + memset(&ref->key, 0, sizeof(ref->key)); + + ref->level = level; + ref->count = count; + ref->parent = parent; + ref->wanted_disk_byte = wanted_disk_byte; + list_add_tail(&ref->list, head); + + return 0; +} + +static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, + struct ulist *parents, + struct extent_buffer *eb, int level, + u64 wanted_objectid, u64 wanted_disk_byte) +{ + int ret; + int slot; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 disk_byte; + +add_parent: + ret = ulist_add(parents, eb->start, 0, GFP_NOFS); + if (ret < 0) + return ret; + + if (level != 0) + return 0; + + /* + * if the current leaf is full with EXTENT_DATA items, we must + * check the next one if that holds a reference as well. + * ref->count cannot be used to skip this check. + * repeat this until we don't find any additional EXTENT_DATA items. + */ + while (1) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret) + return 0; + + eb = path->nodes[0]; + for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != wanted_objectid || + key.type != BTRFS_EXTENT_DATA_KEY) + return 0; + fi = btrfs_item_ptr(eb, slot, + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + if (disk_byte == wanted_disk_byte) + goto add_parent; + } + } + + return 0; +} + +/* + * resolve an indirect backref in the form (root_id, key, level) + * to a logical address + */ +static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, + struct __prelim_ref *ref, + struct ulist *parents) +{ + struct btrfs_path *path; + struct btrfs_root *root; + struct btrfs_key root_key; + struct btrfs_key key = {0}; + struct extent_buffer *eb; + int ret = 0; + int root_level; + int level = ref->level; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + root_key.objectid = ref->root_id; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(fs_info, &root_key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } + + rcu_read_lock(); + root_level = btrfs_header_level(root->node); + rcu_read_unlock(); + + if (root_level + 1 == level) + goto out; + + path->lowest_level = level; + ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); + pr_debug("search slot in root %llu (level %d, ref count %d) returned " + "%d for key (%llu %u %llu)\n", + (unsigned long long)ref->root_id, level, ref->count, ret, + (unsigned long long)ref->key.objectid, ref->key.type, + (unsigned long long)ref->key.offset); + if (ret < 0) + goto out; + + eb = path->nodes[level]; + if (!eb) { + WARN_ON(1); + ret = 1; + goto out; + } + + if (level == 0) { + if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + eb = path->nodes[0]; + } + + btrfs_item_key_to_cpu(eb, &key, path->slots[0]); + } + + /* the last two parameters will only be used for level == 0 */ + ret = add_all_parents(root, path, parents, eb, level, key.objectid, + ref->wanted_disk_byte); +out: + btrfs_free_path(path); + return ret; +} + +/* + * resolve all indirect backrefs from the list + */ +static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, + struct list_head *head) +{ + int err; + int ret = 0; + struct __prelim_ref *ref; + struct __prelim_ref *ref_safe; + struct __prelim_ref *new_ref; + struct ulist *parents; + struct ulist_node *node; + + parents = ulist_alloc(GFP_NOFS); + if (!parents) + return -ENOMEM; + + /* + * _safe allows us to insert directly after the current item without + * iterating over the newly inserted items. + * we're also allowed to re-assign ref during iteration. + */ + list_for_each_entry_safe(ref, ref_safe, head, list) { + if (ref->parent) /* already direct */ + continue; + if (ref->count == 0) + continue; + err = __resolve_indirect_ref(fs_info, ref, parents); + if (err) { + if (ret == 0) + ret = err; + continue; + } + + /* we put the first parent into the ref at hand */ + node = ulist_next(parents, NULL); + ref->parent = node ? node->val : 0; + + /* additional parents require new refs being added here */ + while ((node = ulist_next(parents, node))) { + new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + if (!new_ref) { + ret = -ENOMEM; + break; + } + memcpy(new_ref, ref, sizeof(*ref)); + new_ref->parent = node->val; + list_add(&new_ref->list, &ref->list); + } + ulist_reinit(parents); + } + + ulist_free(parents); + return ret; +} + +/* + * merge two lists of backrefs and adjust counts accordingly + * + * mode = 1: merge identical keys, if key is set + * mode = 2: merge identical parents + */ +static int __merge_refs(struct list_head *head, int mode) +{ + struct list_head *pos1; + + list_for_each(pos1, head) { + struct list_head *n2; + struct list_head *pos2; + struct __prelim_ref *ref1; + + ref1 = list_entry(pos1, struct __prelim_ref, list); + + if (mode == 1 && ref1->key.type == 0) + continue; + for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; + pos2 = n2, n2 = pos2->next) { + struct __prelim_ref *ref2; + + ref2 = list_entry(pos2, struct __prelim_ref, list); + + if (mode == 1) { + if (memcmp(&ref1->key, &ref2->key, + sizeof(ref1->key)) || + ref1->level != ref2->level || + ref1->root_id != ref2->root_id) + continue; + ref1->count += ref2->count; + } else { + if (ref1->parent != ref2->parent) + continue; + ref1->count += ref2->count; + } + list_del(&ref2->list); + kfree(ref2); + } + + } + return 0; +} + +/* + * add all currently queued delayed refs from this head whose seq nr is + * smaller or equal that seq to the list + */ +static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, + struct btrfs_key *info_key, + struct list_head *prefs) +{ + struct btrfs_delayed_extent_op *extent_op = head->extent_op; + struct rb_node *n = &head->node.rb_node; + int sgn; + int ret; + + if (extent_op && extent_op->update_key) + btrfs_disk_key_to_cpu(info_key, &extent_op->key); + + while ((n = rb_prev(n))) { + struct btrfs_delayed_ref_node *node; + node = rb_entry(n, struct btrfs_delayed_ref_node, + rb_node); + if (node->bytenr != head->node.bytenr) + break; + WARN_ON(node->is_head); + + if (node->seq > seq) + continue; + + switch (node->action) { + case BTRFS_ADD_DELAYED_EXTENT: + case BTRFS_UPDATE_DELAYED_HEAD: + WARN_ON(1); + continue; + case BTRFS_ADD_DELAYED_REF: + sgn = 1; + break; + case BTRFS_DROP_DELAYED_REF: + sgn = -1; + break; + default: + BUG_ON(1); + } + switch (node->type) { + case BTRFS_TREE_BLOCK_REF_KEY: { + struct btrfs_delayed_tree_ref *ref; + + ref = btrfs_delayed_node_to_tree_ref(node); + ret = __add_prelim_ref(prefs, ref->root, info_key, + ref->level + 1, 0, node->bytenr, + node->ref_mod * sgn); + break; + } + case BTRFS_SHARED_BLOCK_REF_KEY: { + struct btrfs_delayed_tree_ref *ref; + + ref = btrfs_delayed_node_to_tree_ref(node); + ret = __add_prelim_ref(prefs, ref->root, info_key, + ref->level + 1, ref->parent, + node->bytenr, + node->ref_mod * sgn); + break; + } + case BTRFS_EXTENT_DATA_REF_KEY: { + struct btrfs_delayed_data_ref *ref; + struct btrfs_key key; + + ref = btrfs_delayed_node_to_data_ref(node); + + key.objectid = ref->objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = ref->offset; + ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, + node->bytenr, + node->ref_mod * sgn); + break; + } + case BTRFS_SHARED_DATA_REF_KEY: { + struct btrfs_delayed_data_ref *ref; + struct btrfs_key key; + + ref = btrfs_delayed_node_to_data_ref(node); + + key.objectid = ref->objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = ref->offset; + ret = __add_prelim_ref(prefs, ref->root, &key, 0, + ref->parent, node->bytenr, + node->ref_mod * sgn); + break; + } + default: + WARN_ON(1); + } + BUG_ON(ret); + } + + return 0; +} + +/* + * add all inline backrefs for bytenr to the list + */ +static int __add_inline_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 bytenr, + struct btrfs_key *info_key, int *info_level, + struct list_head *prefs) +{ + int ret; + int slot; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + unsigned long end; + struct btrfs_extent_item *ei; + u64 flags; + u64 item_size; + + /* + * enumerate all inline refs + */ + leaf = path->nodes[0]; + slot = path->slots[0] - 1; + + item_size = btrfs_item_size_nr(leaf, slot); + BUG_ON(item_size < sizeof(*ei)); + + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + item_size; + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + struct btrfs_tree_block_info *info; + struct btrfs_disk_key disk_key; + + info = (struct btrfs_tree_block_info *)ptr; + *info_level = btrfs_tree_block_level(leaf, info); + btrfs_tree_block_key(leaf, info, &disk_key); + btrfs_disk_key_to_cpu(info_key, &disk_key); + ptr += sizeof(struct btrfs_tree_block_info); + BUG_ON(ptr > end); + } else { + BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); + } + + while (ptr < end) { + struct btrfs_extent_inline_ref *iref; + u64 offset; + int type; + + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + switch (type) { + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, 0, info_key, + *info_level + 1, offset, + bytenr, 1); + break; + case BTRFS_SHARED_DATA_REF_KEY: { + struct btrfs_shared_data_ref *sdref; + int count; + + sdref = (struct btrfs_shared_data_ref *)(iref + 1); + count = btrfs_shared_data_ref_count(leaf, sdref); + ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, + bytenr, count); + break; + } + case BTRFS_TREE_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, offset, info_key, + *info_level + 1, 0, bytenr, 1); + break; + case BTRFS_EXTENT_DATA_REF_KEY: { + struct btrfs_extent_data_ref *dref; + int count; + u64 root; + + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + count = btrfs_extent_data_ref_count(leaf, dref); + key.objectid = btrfs_extent_data_ref_objectid(leaf, + dref); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + root = btrfs_extent_data_ref_root(leaf, dref); + ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, + count); + break; + } + default: + WARN_ON(1); + } + BUG_ON(ret); + ptr += btrfs_extent_inline_ref_size(type); + } + + return 0; +} + +/* + * add all non-inline backrefs for bytenr to the list + */ +static int __add_keyed_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 bytenr, + struct btrfs_key *info_key, int info_level, + struct list_head *prefs) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + int ret; + int slot; + struct extent_buffer *leaf; + struct btrfs_key key; + + while (1) { + ret = btrfs_next_item(extent_root, path); + if (ret < 0) + break; + if (ret) { + ret = 0; + break; + } + + slot = path->slots[0]; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + if (key.objectid != bytenr) + break; + if (key.type < BTRFS_TREE_BLOCK_REF_KEY) + continue; + if (key.type > BTRFS_SHARED_DATA_REF_KEY) + break; + + switch (key.type) { + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, 0, info_key, + info_level + 1, key.offset, + bytenr, 1); + break; + case BTRFS_SHARED_DATA_REF_KEY: { + struct btrfs_shared_data_ref *sdref; + int count; + + sdref = btrfs_item_ptr(leaf, slot, + struct btrfs_shared_data_ref); + count = btrfs_shared_data_ref_count(leaf, sdref); + ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, + bytenr, count); + break; + } + case BTRFS_TREE_BLOCK_REF_KEY: + ret = __add_prelim_ref(prefs, key.offset, info_key, + info_level + 1, 0, bytenr, 1); + break; + case BTRFS_EXTENT_DATA_REF_KEY: { + struct btrfs_extent_data_ref *dref; + int count; + u64 root; + + dref = btrfs_item_ptr(leaf, slot, + struct btrfs_extent_data_ref); + count = btrfs_extent_data_ref_count(leaf, dref); + key.objectid = btrfs_extent_data_ref_objectid(leaf, + dref); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = btrfs_extent_data_ref_offset(leaf, dref); + root = btrfs_extent_data_ref_root(leaf, dref); + ret = __add_prelim_ref(prefs, root, &key, 0, 0, + bytenr, count); + break; + } + default: + WARN_ON(1); + } + BUG_ON(ret); + } + + return ret; +} + +/* + * this adds all existing backrefs (inline backrefs, backrefs and delayed + * refs) for the given bytenr to the refs list, merges duplicates and resolves + * indirect refs to their parent bytenr. + * When roots are found, they're added to the roots list + * + * FIXME some caching might speed things up + */ +static int find_parent_nodes(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 seq, struct ulist *refs, struct ulist *roots) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct btrfs_key info_key = { 0 }; + struct btrfs_delayed_ref_root *delayed_refs = NULL; + struct btrfs_delayed_ref_head *head = NULL; + int info_level = 0; + int ret; + struct list_head prefs_delayed; + struct list_head prefs; + struct __prelim_ref *ref; + + INIT_LIST_HEAD(&prefs); + INIT_LIST_HEAD(&prefs_delayed); + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * grab both a lock on the path and a lock on the delayed ref head. + * We need both to get a consistent picture of how the refs look + * at a specified point in time + */ +again: + ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + /* + * look if there are updates for this ref queued and lock the head + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(trans, bytenr); + if (head) { + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + /* + * Mutex was contended, block until it's + * released and try again + */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + goto again; + } + ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); + if (ret) + goto out; + } + spin_unlock(&delayed_refs->lock); + + if (path->slots[0]) { + struct extent_buffer *leaf; + int slot; + + leaf = path->nodes[0]; + slot = path->slots[0] - 1; + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY) { + ret = __add_inline_refs(fs_info, path, bytenr, + &info_key, &info_level, &prefs); + if (ret) + goto out; + ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, + info_level, &prefs); + if (ret) + goto out; + } + } + btrfs_release_path(path); + + /* + * when adding the delayed refs above, the info_key might not have + * been known yet. Go over the list and replace the missing keys + */ + list_for_each_entry(ref, &prefs_delayed, list) { + if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) + memcpy(&ref->key, &info_key, sizeof(ref->key)); + } + list_splice_init(&prefs_delayed, &prefs); + + ret = __merge_refs(&prefs, 1); + if (ret) + goto out; + + ret = __resolve_indirect_refs(fs_info, &prefs); + if (ret) + goto out; + + ret = __merge_refs(&prefs, 2); + if (ret) + goto out; + + while (!list_empty(&prefs)) { + ref = list_first_entry(&prefs, struct __prelim_ref, list); + list_del(&ref->list); + if (ref->count < 0) + WARN_ON(1); + if (ref->count && ref->root_id && ref->parent == 0) { + /* no parent == root of tree */ + ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); + BUG_ON(ret < 0); + } + if (ref->count && ref->parent) { + ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); + BUG_ON(ret < 0); + } + kfree(ref); + } + +out: + if (head) + mutex_unlock(&head->mutex); + btrfs_free_path(path); + while (!list_empty(&prefs)) { + ref = list_first_entry(&prefs, struct __prelim_ref, list); + list_del(&ref->list); + kfree(ref); + } + while (!list_empty(&prefs_delayed)) { + ref = list_first_entry(&prefs_delayed, struct __prelim_ref, + list); + list_del(&ref->list); + kfree(ref); + } + + return ret; +} + +/* + * Finds all leafs with a reference to the specified combination of bytenr and + * offset. key_list_head will point to a list of corresponding keys (caller must + * free each list element). The leafs will be stored in the leafs ulist, which + * must be freed with ulist_free. + * + * returns 0 on success, <0 on error + */ +static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 num_bytes, u64 seq, struct ulist **leafs) +{ + struct ulist *tmp; + int ret; + + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + *leafs = ulist_alloc(GFP_NOFS); + if (!*leafs) { + ulist_free(tmp); + return -ENOMEM; + } + + ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); + ulist_free(tmp); + + if (ret < 0 && ret != -ENOENT) { + ulist_free(*leafs); + return ret; + } + + return 0; +} + +/* + * walk all backrefs for a given extent to find all roots that reference this + * extent. Walking a backref means finding all extents that reference this + * extent and in turn walk the backrefs of those, too. Naturally this is a + * recursive process, but here it is implemented in an iterative fashion: We + * find all referencing extents for the extent in question and put them on a + * list. In turn, we find all referencing extents for those, further appending + * to the list. The way we iterate the list allows adding more elements after + * the current while iterating. The process stops when we reach the end of the + * list. Found roots are added to the roots list. + * + * returns 0 on success, < 0 on error. + */ +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 num_bytes, u64 seq, struct ulist **roots) +{ + struct ulist *tmp; + struct ulist_node *node = NULL; + int ret; + + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + *roots = ulist_alloc(GFP_NOFS); + if (!*roots) { + ulist_free(tmp); + return -ENOMEM; + } + + while (1) { + ret = find_parent_nodes(trans, fs_info, bytenr, seq, + tmp, *roots); + if (ret < 0 && ret != -ENOENT) { + ulist_free(tmp); + ulist_free(*roots); + return ret; + } + node = ulist_next(tmp, node); + if (!node) + break; + bytenr = node->val; + } + + ulist_free(tmp); + return 0; +} + + static int __inode_info(u64 inum, u64 ioff, u8 key_type, struct btrfs_root *fs_root, struct btrfs_path *path, struct btrfs_key *found_key) diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 92618837cb8..d00dfa9ca93 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -20,6 +20,7 @@ #define __BTRFS_BACKREF__ #include "ioctl.h" +#include "ulist.h" struct inode_fs_paths { struct btrfs_path *btrfs_path; @@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 num_bytes, u64 seq, struct ulist **roots); + struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, struct btrfs_path *path); -- cgit v1.2.3 From 19c5246d251640ac76daa4d34165af78c64b1454 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 4 Jan 2012 17:09:44 -0500 Subject: ext4: add new online resize interface This patch adds new online resize interface, whose input argument is a 64-bit integer indicating how many blocks there are in the resized fs. In new resize impelmentation, all work like allocating group tables are done by kernel side, so the new resize interface can support flex_bg feature and prepares ground for suppoting resize with features like bigalloc and exclude bitmap. Besides these, user-space tools just passes in the new number of blocks. We delay initializing the bitmaps and inode tables of added groups if possible and add multi groups (a flex groups) each time, so new resize is very fast like mkfs. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- Documentation/filesystems/ext4.txt | 7 ++ fs/ext4/ext4.h | 2 + fs/ext4/ioctl.c | 57 ++++++++++++ fs/ext4/resize.c | 177 +++++++++++++++++++++++++++++++++++++ 4 files changed, 243 insertions(+) (limited to 'fs') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 4917cf24a5e..10ec4639f15 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -581,6 +581,13 @@ Table of Ext4 specific ioctls behaviour may change in the future as it is not necessary and has been done this way only for sake of simplicity. + + EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number + of blocks of resized filesystem is passed in via + 64 bit integer argument. The kernel allocates + bitmaps and inode table, the userspace tool thus + just passes the new number of blocks. + .............................................................................. References diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 05058e2b7f4..4bc0e82a905 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -583,6 +583,7 @@ enum { /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) +#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -1929,6 +1930,7 @@ extern int ext4_group_add(struct super_block *sb, extern int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_fsblk_t n_blocks_count); +extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ extern void *ext4_kvmalloc(size_t size, gfp_t flags); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ff1aab7cd6e..c1a98804a38 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -18,6 +18,8 @@ #include "ext4_jbd2.h" #include "ext4.h" +#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; @@ -329,6 +331,60 @@ mext_out: return err; } + case EXT4_IOC_RESIZE_FS: { + ext4_fsblk_t n_blocks_count; + struct super_block *sb = inode->i_sb; + int err = 0, err2 = 0; + + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + ext4_msg(sb, KERN_ERR, + "Online resizing not (yet) supported with bigalloc"); + return -EOPNOTSUPP; + } + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_META_BG)) { + ext4_msg(sb, KERN_ERR, + "Online resizing not (yet) supported with meta_bg"); + return -EOPNOTSUPP; + } + + if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, + sizeof(__u64))) { + return -EFAULT; + } + + if (n_blocks_count > MAX_32_NUM && + !EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_64BIT)) { + ext4_msg(sb, KERN_ERR, + "File system only supports 32-bit block numbers"); + return -EOPNOTSUPP; + } + + err = ext4_resize_begin(sb); + if (err) + return err; + + err = mnt_want_write(filp->f_path.mnt); + if (err) + goto resizefs_out; + + err = ext4_resize_fs(sb, n_blocks_count); + if (EXT4_SB(sb)->s_journal) { + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } + if (err == 0) + err = err2; + mnt_drop_write(filp->f_path.mnt); +resizefs_out: + ext4_resize_end(sb); + return err; + } + case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); @@ -427,6 +483,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } case EXT4_IOC_MOVE_EXT: case FITRIM: + case EXT4_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index dac23561f3e..5fe2a013ee6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1430,6 +1430,70 @@ exit: return err; } +static int ext4_setup_next_flex_gd(struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd, + ext4_fsblk_t n_blocks_count, + unsigned long flexbg_size) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_new_group_data *group_data = flex_gd->groups; + ext4_fsblk_t o_blocks_count; + ext4_group_t n_group; + ext4_group_t group; + ext4_group_t last_group; + ext4_grpblk_t last; + ext4_grpblk_t blocks_per_group; + unsigned long i; + + blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); + + o_blocks_count = ext4_blocks_count(es); + + if (o_blocks_count == n_blocks_count) + return 0; + + ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); + BUG_ON(last); + ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); + + last_group = group | (flexbg_size - 1); + if (last_group > n_group) + last_group = n_group; + + flex_gd->count = last_group - group + 1; + + for (i = 0; i < flex_gd->count; i++) { + int overhead; + + group_data[i].group = group + i; + group_data[i].blocks_count = blocks_per_group; + overhead = ext4_bg_has_super(sb, group + i) ? + (1 + ext4_bg_num_gdb(sb, group + i) + + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; + group_data[i].free_blocks_count = blocks_per_group - overhead; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | + EXT4_BG_INODE_UNINIT; + else + flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; + } + + if (last_group == n_group && + EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + /* We need to initialize block bitmap of last group. */ + flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; + + if ((last_group == n_group) && (last != blocks_per_group - 1)) { + group_data[i - 1].blocks_count = last + 1; + group_data[i - 1].free_blocks_count -= blocks_per_group- + last - 1; + } + + return 1; +} + /* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it @@ -1827,3 +1891,116 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, exit_put: return err; } /* ext4_group_extend */ + +/* + * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count + * + * @sb: super block of the fs to be resized + * @n_blocks_count: the number of blocks resides in the resized fs + */ +int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) +{ + struct ext4_new_flex_group_data *flex_gd = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + struct buffer_head *bh; + struct inode *resize_inode; + ext4_fsblk_t o_blocks_count; + ext4_group_t o_group; + ext4_group_t n_group; + ext4_grpblk_t offset; + unsigned long n_desc_blocks; + unsigned long o_desc_blocks; + unsigned long desc_blocks; + int err = 0, flexbg_size = 1; + + o_blocks_count = ext4_blocks_count(es); + + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " + "upto %llu blocks\n", o_blocks_count, n_blocks_count); + + if (n_blocks_count < o_blocks_count) { + /* On-line shrinking not supported */ + ext4_warning(sb, "can't shrink FS - resize aborted"); + return -EINVAL; + } + + if (n_blocks_count == o_blocks_count) + /* Nothing need to do */ + return 0; + + ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); + ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); + + n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / + EXT4_DESC_PER_BLOCK(sb); + o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / + EXT4_DESC_PER_BLOCK(sb); + desc_blocks = n_desc_blocks - o_desc_blocks; + + if (desc_blocks && + (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || + le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { + ext4_warning(sb, "No reserved GDT blocks, can't resize"); + return -EPERM; + } + + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(resize_inode)) { + ext4_warning(sb, "Error opening resize inode"); + return PTR_ERR(resize_inode); + } + + /* See if the device is actually as big as what was requested */ + bh = sb_bread(sb, n_blocks_count - 1); + if (!bh) { + ext4_warning(sb, "can't read last block, resize aborted"); + return -ENOSPC; + } + brelse(bh); + + if (offset != 0) { + /* extend the last group */ + ext4_grpblk_t add; + add = EXT4_BLOCKS_PER_GROUP(sb) - offset; + err = ext4_group_extend_no_check(sb, o_blocks_count, add); + if (err) + goto out; + } + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && + es->s_log_groups_per_flex) + flexbg_size = 1 << es->s_log_groups_per_flex; + + o_blocks_count = ext4_blocks_count(es); + if (o_blocks_count == n_blocks_count) + goto out; + + flex_gd = alloc_flex_gd(flexbg_size); + if (flex_gd == NULL) { + err = -ENOMEM; + goto out; + } + + /* Add flex groups. Note that a regular group is a + * flex group with 1 group. + */ + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, + flexbg_size)) { + ext4_alloc_group_tables(sb, flex_gd, flexbg_size); + err = ext4_flex_group_add(sb, resize_inode, flex_gd); + if (unlikely(err)) + break; + } + +out: + if (flex_gd) + free_flex_gd(flex_gd); + + iput(resize_inode); + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " + "upto %llu blocks\n", o_blocks_count, n_blocks_count); + return err; +} -- cgit v1.2.3 From d89651c8e222b2d2797bf66d4eb7064459f4f4f4 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 4 Jan 2012 17:09:48 -0500 Subject: ext4: let ext4_group_extend() use common code ext4_group_extend_no_check() is moved out from ext4_group_extend(), this patch lets ext4_group_extend() call ext4_group_extentd_no_check() instead. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 41 ++--------------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5fe2a013ee6..eba706d9276 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1795,8 +1795,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_grpblk_t last; ext4_grpblk_t add; struct buffer_head *bh; - handle_t *handle; - int err, err2; + int err; ext4_group_t group; o_blocks_count = ext4_blocks_count(es); @@ -1852,43 +1851,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, } brelse(bh); - /* We will update the superblock, one block bitmap, and - * one group descriptor via ext4_free_blocks(). - */ - handle = ext4_journal_start_sb(sb, 3); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ext4_warning(sb, "error %d on journal start", err); - goto exit_put; - } - - if ((err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, "error %d on journal write access", err); - ext4_journal_stop(handle); - goto exit_put; - } - ext4_blocks_count_set(es, o_blocks_count + add); - ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, - o_blocks_count + add); - /* We add the blocks to the bitmap and set the group need init bit */ - err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); - ext4_handle_dirty_super(handle, sb); - ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, - o_blocks_count + add); - err2 = ext4_journal_stop(handle); - if (!err && err2) - err = err2; - - if (err) - goto exit_put; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", - ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); -exit_put: + err = ext4_group_extend_no_check(sb, o_blocks_count, add); return err; } /* ext4_group_extend */ -- cgit v1.2.3 From 61f296cc49751f1dc992039229d12b0de7e0c2ae Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 4 Jan 2012 17:09:50 -0500 Subject: ext4: let ext4_group_add() use common code This patch lets ext4_group_add() call ext4_flex_group_add(). Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 309 ++----------------------------------------------------- 1 file changed, 10 insertions(+), 299 deletions(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index eba706d9276..f9d948f0eb8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -594,137 +594,6 @@ out: return err; } -/* - * Set up the block and inode bitmaps, and the inode table for the new group. - * This doesn't need to be part of the main transaction, since we are only - * changing blocks outside the actual filesystem. We still do journaling to - * ensure the recovery is correct in case of a failure just after resize. - * If any part of this fails, we simply abort the resize. - */ -static int setup_new_group_blocks(struct super_block *sb, - struct ext4_new_group_data *input) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); - int reserved_gdb = ext4_bg_has_super(sb, input->group) ? - le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; - unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); - struct buffer_head *bh; - handle_t *handle; - ext4_fsblk_t block; - ext4_grpblk_t bit; - int i; - int err = 0, err2; - - /* This transaction may be extended/restarted along the way */ - handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); - - if (IS_ERR(handle)) - return PTR_ERR(handle); - - BUG_ON(input->group != sbi->s_groups_count); - - /* Copy all of the GDT blocks into the backup in this group */ - for (i = 0, bit = 1, block = start + 1; - i < gdblocks; i++, block++, bit++) { - struct buffer_head *gdb; - - ext4_debug("update backup group %#04llx (+%d)\n", block, bit); - err = extend_or_restart_transaction(handle, 1); - if (err) - goto exit_journal; - - gdb = sb_getblk(sb, block); - if (!gdb) { - err = -EIO; - goto exit_journal; - } - if ((err = ext4_journal_get_write_access(handle, gdb))) { - brelse(gdb); - goto exit_journal; - } - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); - set_buffer_uptodate(gdb); - err = ext4_handle_dirty_metadata(handle, NULL, gdb); - if (unlikely(err)) { - brelse(gdb); - goto exit_journal; - } - brelse(gdb); - } - - /* Zero out all of the reserved backup group descriptor table blocks */ - ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", - block, sbi->s_itb_per_group); - err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, - GFP_NOFS); - if (err) - goto exit_journal; - - err = extend_or_restart_transaction(handle, 2); - if (err) - goto exit_journal; - - bh = bclean(handle, sb, input->block_bitmap); - if (IS_ERR(bh)) { - err = PTR_ERR(bh); - goto exit_journal; - } - - if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup group tables %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); - } - - ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, - input->block_bitmap - start); - ext4_set_bit(input->block_bitmap - start, bh->b_data); - ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, - input->inode_bitmap - start); - ext4_set_bit(input->inode_bitmap - start, bh->b_data); - - /* Zero out all of the inode table blocks */ - block = input->inode_table; - ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", - block, sbi->s_itb_per_group); - err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); - if (err) - goto exit_bh; - ext4_set_bits(bh->b_data, input->inode_table - start, - sbi->s_itb_per_group); - - - ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, - bh->b_data); - err = ext4_handle_dirty_metadata(handle, NULL, bh); - if (unlikely(err)) { - ext4_std_error(sb, err); - goto exit_bh; - } - brelse(bh); - /* Mark unused entries in inode bitmap used */ - ext4_debug("clear inode bitmap %#04llx (+%llu)\n", - input->inode_bitmap, input->inode_bitmap - start); - if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - err = ext4_handle_dirty_metadata(handle, NULL, bh); - if (unlikely(err)) - ext4_std_error(sb, err); -exit_bh: - brelse(bh); - -exit_journal: - if ((err2 = ext4_journal_stop(handle)) && !err) - err = err2; - - return err; -} - /* * Iterate through the groups which hold BACKUP superblock/GDT copies in an * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before @@ -1509,16 +1378,15 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, */ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) { + struct ext4_new_flex_group_data flex_gd; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int reserved_gdb = ext4_bg_has_super(sb, input->group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; - struct buffer_head *primary = NULL; - struct ext4_group_desc *gdp; struct inode *inode = NULL; - handle_t *handle; int gdb_off, gdb_num; - int err, err2; + int err; + __u16 bg_flags = 0; gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); @@ -1557,172 +1425,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } - if ((err = verify_group_input(sb, input))) - goto exit_put; - - if ((err = setup_new_group_blocks(sb, input))) - goto exit_put; - - /* - * We will always be modifying at least the superblock and a GDT - * block. If we are adding a group past the last current GDT block, - * we will also modify the inode and the dindirect block. If we - * are adding a group with superblock/GDT backups we will also - * modify each of the reserved GDT dindirect blocks. - */ - handle = ext4_journal_start_sb(sb, - ext4_bg_has_super(sb, input->group) ? - 3 + reserved_gdb : 4); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto exit_put; - } - - if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) - goto exit_journal; - - /* - * We will only either add reserved group blocks to a backup group - * or remove reserved blocks for the first group in a new group block. - * Doing both would be mean more complex code, and sane people don't - * use non-sparse filesystems anymore. This is already checked above. - */ - if (gdb_off) { - primary = sbi->s_group_desc[gdb_num]; - if ((err = ext4_journal_get_write_access(handle, primary))) - goto exit_journal; - - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { - err = reserve_backup_gdb(handle, inode, input->group); - if (err) - goto exit_journal; - } - } else { - /* - * Note that we can access new group descriptor block safely - * only if add_new_gdb() succeeds. - */ - err = add_new_gdb(handle, inode, input->group); - if (err) - goto exit_journal; - primary = sbi->s_group_desc[gdb_num]; - } - - /* - * OK, now we've set up the new group. Time to make it active. - * - * so we have to be safe wrt. concurrent accesses the group - * data. So we need to be careful to set all of the relevant - * group descriptor data etc. *before* we enable the group. - * - * The key field here is sbi->s_groups_count: as long as - * that retains its old value, nobody is going to access the new - * group. - * - * So first we update all the descriptor metadata for the new - * group; then we update the total disk blocks count; then we - * update the groups count to enable the group; then finally we - * update the free space counts so that the system can start - * using the new disk blocks. - */ - - /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)((char *)primary->b_data + - gdb_off * EXT4_DESC_SIZE(sb)); - - memset(gdp, 0, EXT4_DESC_SIZE(sb)); - ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ - ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ - ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ - ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); - ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); - gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); - gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); - - /* - * We can allocate memory for mb_alloc based on the new group - * descriptor - */ - err = ext4_mb_add_groupinfo(sb, input->group, gdp); + err = verify_group_input(sb, input); if (err) - goto exit_journal; - - /* - * Make the new blocks and inodes valid next. We do this before - * increasing the group count so that once the group is enabled, - * all of its blocks and inodes are already valid. - * - * We always allocate group-by-group, then block-by-block or - * inode-by-inode within a group, so enabling these - * blocks/inodes before the group is live won't actually let us - * allocate the new space yet. - */ - ext4_blocks_count_set(es, ext4_blocks_count(es) + - input->blocks_count); - le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); - - /* - * We need to protect s_groups_count against other CPUs seeing - * inconsistent state in the superblock. - * - * The precise rules we use are: - * - * * Writers must perform a smp_wmb() after updating all dependent - * data and before modifying the groups count - * - * * Readers must perform an smp_rmb() after reading the groups count - * and before reading any dependent data. - * - * NB. These rules can be relaxed when checking the group count - * while freeing data, as we can only allocate from a block - * group after serialising against the group count, and we can - * only then free after serialising in turn against that - * allocation. - */ - smp_wmb(); - - /* Update the global fs size fields */ - sbi->s_groups_count++; - - err = ext4_handle_dirty_metadata(handle, NULL, primary); - if (unlikely(err)) { - ext4_std_error(sb, err); - goto exit_journal; - } - - /* Update the reserved block counts only once the new group is - * active. */ - ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + - input->reserved_blocks); - - /* Update the free space counts */ - percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, input->free_blocks_count)); - percpu_counter_add(&sbi->s_freeinodes_counter, - EXT4_INODES_PER_GROUP(sb)); - - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && - sbi->s_log_groups_per_flex) { - ext4_group_t flex_group; - flex_group = ext4_flex_group(sbi, input->group); - atomic_add(EXT4_B2C(sbi, input->free_blocks_count), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(EXT4_INODES_PER_GROUP(sb), - &sbi->s_flex_groups[flex_group].free_inodes); - } - - ext4_handle_dirty_super(handle, sb); + goto out; -exit_journal: - if ((err2 = ext4_journal_stop(handle)) && !err) - err = err2; - if (!err && primary) { - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block)); - update_backups(sb, primary->b_blocknr, primary->b_data, - primary->b_size); - } -exit_put: + flex_gd.count = 1; + flex_gd.groups = input; + flex_gd.bg_flags = &bg_flags; + err = ext4_flex_group_add(sb, inode, &flex_gd); +out: iput(inode); return err; } /* ext4_group_add */ -- cgit v1.2.3 From 014a1770371a028d22f364718c805f4216911ecd Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 4 Jan 2012 17:09:52 -0500 Subject: ext4: add missing ext4_resize_end on error paths Online resize ioctls 'EXT4_IOC_GROUP_EXTEND' and 'EXT4_IOC_GROUP_ADD' call ext4_resize_begin() to check permissions and to set the EXT4_RESIZING bit lock, they do their work and they must finish with ext4_resize_end() which calls clear_bit_unlock() to unlock and to avoid -EBUSY errors for the next resize operations. This patch adds the missing ext4_resize_end() calls on error paths. Patch tested. Cc: stable@vger.kernel.org Signed-off-by: Djalal Harouni Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c1a98804a38..b81a5f1b697 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -184,19 +184,22 @@ setversion_out: if (err) return err; - if (get_user(n_blocks_count, (__u32 __user *)arg)) - return -EFAULT; + if (get_user(n_blocks_count, (__u32 __user *)arg)) { + err = -EFAULT; + goto group_extend_out; + } if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto group_extend_out; } err = mnt_want_write(filp->f_path.mnt); if (err) - return err; + goto group_extend_out; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { @@ -206,9 +209,10 @@ setversion_out: } if (err == 0) err = err2; + mnt_drop_write(filp->f_path.mnt); +group_extend_out: ext4_resize_end(sb); - return err; } @@ -267,19 +271,22 @@ mext_out: return err; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, - sizeof(input))) - return -EFAULT; + sizeof(input))) { + err = -EFAULT; + goto group_add_out; + } if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto group_add_out; } err = mnt_want_write(filp->f_path.mnt); if (err) - return err; + goto group_add_out; err = ext4_group_add(sb, &input); if (EXT4_SB(sb)->s_journal) { @@ -289,9 +296,10 @@ mext_out: } if (err == 0) err = err2; + mnt_drop_write(filp->f_path.mnt); +group_add_out: ext4_resize_end(sb); - return err; } -- cgit v1.2.3 From 1d526fc91bea04ee35b7599bf8b82f86c0aaf46c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Jan 2012 21:22:51 -0500 Subject: ext4: Report max_batch_time option correctly Currently the value reported for max_batch_time is really the value of min_batch_time. Reported-by: Russell Coker Signed-off-by: Ben Hutchings --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 35377d57ec4..36570b7af7c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1096,7 +1096,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) } if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { seq_printf(seq, ",max_batch_time=%u", - (unsigned) sbi->s_min_batch_time); + (unsigned) sbi->s_max_batch_time); } /* -- cgit v1.2.3 From 9b90e5e02896406a6da28a376568003d14c06770 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Jan 2012 22:01:53 -0500 Subject: ext4: reserve new feature flag codepoints Reserve the ext4 features flags EXT4_FEATURE_RO_COMPAT_METADATA_CSUM, EXT4_FEATURE_INCOMPAT_INLINEDATA, and EXT4_FEATURE_INCOMPAT_LARGEDIR. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4bc0e82a905..13d15149c85 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1407,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 +#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1419,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ +#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ +#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ -- cgit v1.2.3 From 9837d8e982b7e87a7207f90618e45d460e196e6a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jan 2012 22:03:11 -0500 Subject: jbd2: fix hung processes in jbd2_journal_lock_updates() Toshiyuki Okajima found out that when running for ((i=0; i < 100000; i++)); do if ((i%2 == 0)); then chattr +j /mnt/file else chattr -j /mnt/file fi echo "0" >> /mnt/file done process sometimes hangs indefinitely in jbd2_journal_lock_updates(). Toshiyuki identified that the following race happens: jbd2_journal_lock_updates() |jbd2_journal_stop() ---------------------------------------+--------------------------------------- write_lock(&journal->j_state_lock) | . ++journal->j_barrier_count | . spin_lock(&tran->t_handle_lock) | . atomic_read(&tran->t_updates) //not 0 | | atomic_dec_and_test(&tran->t_updates) | // t_updates = 0 | wake_up(&journal->j_wait_updates) prepare_to_wait() | // no process is woken up. spin_unlock(&tran->t_handle_lock) | write_unlock(&journal->j_state_lock) | schedule() // never return | We fix the problem by first calling prepare_to_wait() and only after that checking t_updates in jbd2_journal_lock_updates(). Reported-and-analyzed-by: Toshiyuki Okajima Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/transaction.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0e41a4c080..35ae096bed5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal) break; spin_lock(&transaction->t_handle_lock); + prepare_to_wait(&journal->j_wait_updates, &wait, + TASK_UNINTERRUPTIBLE); if (!atomic_read(&transaction->t_updates)) { spin_unlock(&transaction->t_handle_lock); + finish_wait(&journal->j_wait_updates, &wait); break; } - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); spin_unlock(&transaction->t_handle_lock); write_unlock(&journal->j_state_lock); schedule(); -- cgit v1.2.3 From 176576dbc8141528557eeeb007af2d5a2a4891ef Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 4 Jan 2012 22:32:12 -0500 Subject: ext4: make local symbol ext4_initxattrs static The ext4_initxattrs symbol is used only in this file, so it should be declared static. Signed-off-by: Djalal Harouni Signed-off-by: "Theodore Ts'o" --- fs/ext4/xattr_security.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 34e4350dd4d..e247f8bca8e 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -48,8 +48,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, name, value, size, flags); } -int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) +static int +ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { const struct xattr *xattr; handle_t *handle = fs_info; -- cgit v1.2.3 From 5f163cc759a9fa8844a4efcf1f579dc5b2ca2491 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 4 Jan 2012 22:33:28 -0500 Subject: ext4: make more symbols static A couple more functions can reasonably be made static if desired. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 4 +++- fs/ext4/ext4.h | 5 ----- fs/ext4/inode.c | 5 ++++- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 12ccacda44e..f9e2cd8cf71 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -23,6 +23,8 @@ #include +static unsigned ext4_num_base_meta_clusters(struct super_block *sb, + ext4_group_t block_group); /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) * This function returns the number of file system metadata clusters at * the beginning of a block group, including the reserved gdt blocks. */ -unsigned ext4_num_base_meta_clusters(struct super_block *sb, +static unsigned ext4_num_base_meta_clusters(struct super_block *sb, ext4_group_t block_group) { struct ext4_sb_info *sbi = EXT4_SB(sb); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 13d15149c85..e7dc9ad7394 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1803,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb, extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp); -extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, - ext4_group_t block_group); extern unsigned ext4_num_overhead_clusters(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp); @@ -1896,9 +1894,6 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_discard_partial_page_buffers(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length, int flags); -extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, - struct inode *inode, struct page *page, loff_t from, - loff_t length, int flags); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e6cc24dfa98..a526684cbe3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -72,6 +72,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); +static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, + struct inode *inode, struct page *page, loff_t from, + loff_t length, int flags); /* * Test whether an inode is a fast symlink. @@ -3161,7 +3164,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * * Returns zero on sucess or negative on failure. */ -int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, +static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, struct inode *inode, struct page *page, loff_t from, loff_t length, int flags) { -- cgit v1.2.3 From 4692cf58aa7b81f721c1653d48db99ea41421d58 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 2 Dec 2011 14:56:41 +0100 Subject: Btrfs: new backref walking code The old backref iteration code could only safely be used on commit roots. Besides this limitation, it had bugs in finding the roots for these references. This commit replaces large parts of it by btrfs_find_all_roots() which a) really finds all roots and the correct roots, b) works correctly under heavy file system load, c) considers delayed refs. Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 354 +++++++++++++++-------------------------------------- fs/btrfs/ioctl.c | 8 +- fs/btrfs/scrub.c | 7 +- 3 files changed, 107 insertions(+), 262 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 03c30a1836f..b9a843226de 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -23,18 +23,6 @@ #include "transaction.h" #include "delayed-ref.h" -struct __data_ref { - struct list_head list; - u64 inum; - u64 root; - u64 extent_data_item_offset; -}; - -struct __shared_ref { - struct list_head list; - u64 disk_byte; -}; - /* * this structure records all encountered refs on the way up to the root */ @@ -964,8 +952,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); if (found_key->type != BTRFS_EXTENT_ITEM_KEY || found_key->objectid > logical || - found_key->objectid + found_key->offset <= logical) + found_key->objectid + found_key->offset <= logical) { + pr_debug("logical %llu is not within any extent\n", + (unsigned long long)logical); return -ENOENT; + } eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); @@ -974,6 +965,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); flags = btrfs_extent_flags(eb, ei); + pr_debug("logical %llu is at position %llu within the extent (%llu " + "EXTENT_ITEM %llu) flags %#llx size %u\n", + (unsigned long long)logical, + (unsigned long long)(logical - found_key->objectid), + (unsigned long long)found_key->objectid, + (unsigned long long)found_key->offset, + (unsigned long long)flags, item_size); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) return BTRFS_EXTENT_FLAG_TREE_BLOCK; if (flags & BTRFS_EXTENT_FLAG_DATA) @@ -1070,128 +1068,11 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, return 0; } -static int __data_list_add(struct list_head *head, u64 inum, - u64 extent_data_item_offset, u64 root) -{ - struct __data_ref *ref; - - ref = kmalloc(sizeof(*ref), GFP_NOFS); - if (!ref) - return -ENOMEM; - - ref->inum = inum; - ref->extent_data_item_offset = extent_data_item_offset; - ref->root = root; - list_add_tail(&ref->list, head); - - return 0; -} - -static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb, - struct btrfs_extent_data_ref *dref) -{ - return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref), - btrfs_extent_data_ref_offset(eb, dref), - btrfs_extent_data_ref_root(eb, dref)); -} - -static int __shared_list_add(struct list_head *head, u64 disk_byte) -{ - struct __shared_ref *ref; - - ref = kmalloc(sizeof(*ref), GFP_NOFS); - if (!ref) - return -ENOMEM; - - ref->disk_byte = disk_byte; - list_add_tail(&ref->list, head); - - return 0; -} - -static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info, - u64 logical, u64 inum, - u64 extent_data_item_offset, - u64 extent_offset, - struct btrfs_path *path, - struct list_head *data_refs, - iterate_extent_inodes_t *iterate, - void *ctx) -{ - u64 ref_root; - u32 item_size; - struct btrfs_key key; - struct extent_buffer *eb; - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *eiref; - struct __data_ref *ref; - int ret; - int type; - int last; - unsigned long ptr = 0; - - WARN_ON(!list_empty(data_refs)); - ret = extent_from_logical(fs_info, logical, path, &key); - if (ret & BTRFS_EXTENT_FLAG_DATA) - ret = -EIO; - if (ret < 0) - goto out; - - eb = path->nodes[0]; - ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - item_size = btrfs_item_size_nr(eb, path->slots[0]); - - ret = 0; - ref_root = 0; - /* - * as done in iterate_extent_inodes, we first build a list of refs to - * iterate, then free the path and then iterate them to avoid deadlocks. - */ - do { - last = __get_extent_inline_ref(&ptr, eb, ei, item_size, - &eiref, &type); - if (last < 0) { - ret = last; - goto out; - } - if (type == BTRFS_TREE_BLOCK_REF_KEY || - type == BTRFS_SHARED_BLOCK_REF_KEY) { - ref_root = btrfs_extent_inline_ref_offset(eb, eiref); - ret = __data_list_add(data_refs, inum, - extent_data_item_offset, - ref_root); - } - } while (!ret && !last); - - btrfs_release_path(path); - - if (ref_root == 0) { - printk(KERN_ERR "btrfs: failed to find tree block ref " - "for shared data backref %llu\n", logical); - WARN_ON(1); - ret = -EIO; - } - -out: - while (!list_empty(data_refs)) { - ref = list_first_entry(data_refs, struct __data_ref, list); - list_del(&ref->list); - if (!ret) - ret = iterate(ref->inum, extent_offset + - ref->extent_data_item_offset, - ref->root, ctx); - kfree(ref); - } - - return ret; -} - -static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, - u64 logical, u64 orig_extent_item_objectid, - u64 extent_offset, struct btrfs_path *path, - struct list_head *data_refs, - iterate_extent_inodes_t *iterate, - void *ctx) +static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, u64 logical, + u64 orig_extent_item_objectid, + u64 extent_item_pos, u64 root, + iterate_extent_inodes_t *iterate, void *ctx) { u64 disk_byte; struct btrfs_key key; @@ -1199,8 +1080,10 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, struct extent_buffer *eb; int slot; int nritems; - int ret; - int found = 0; + int ret = 0; + int extent_type; + u64 data_offset; + u64 data_len; eb = read_tree_block(fs_info->tree_root, logical, fs_info->tree_root->leafsize, 0); @@ -1218,149 +1101,99 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, if (key.type != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); - if (!fi) { - free_extent_buffer(eb); - return -EIO; - } + extent_type = btrfs_file_extent_type(eb, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) + continue; + /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); - if (disk_byte != orig_extent_item_objectid) { - if (found) - break; - else - continue; - } - ++found; - ret = __iter_shared_inline_ref_inodes(fs_info, logical, - key.objectid, - key.offset, - extent_offset, path, - data_refs, - iterate, ctx); - if (ret) - break; - } + if (disk_byte != orig_extent_item_objectid) + continue; - if (!found) { - printk(KERN_ERR "btrfs: failed to follow shared data backref " - "to parent %llu\n", logical); - WARN_ON(1); - ret = -EIO; + data_offset = btrfs_file_extent_offset(eb, fi); + data_len = btrfs_file_extent_num_bytes(eb, fi); + + if (extent_item_pos < data_offset || + extent_item_pos >= data_offset + data_len) + continue; + + pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " + "root %llu\n", orig_extent_item_objectid, + key.objectid, key.offset, root); + ret = iterate(key.objectid, + key.offset + (extent_item_pos - data_offset), + root, ctx); + if (ret) { + pr_debug("stopping iteration because ret=%d\n", ret); + break; + } } free_extent_buffer(eb); + return ret; } /* * calls iterate() for every inode that references the extent identified by - * the given parameters. will use the path given as a parameter and return it - * released. + * the given parameters. * when the iterator function returns a non-zero value, iteration stops. + * path is guaranteed to be in released state when iterate() is called. */ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - u64 extent_item_objectid, - u64 extent_offset, + u64 extent_item_objectid, u64 extent_item_pos, iterate_extent_inodes_t *iterate, void *ctx) { - unsigned long ptr = 0; - int last; int ret; - int type; - u64 logical; - u32 item_size; - struct btrfs_extent_inline_ref *eiref; - struct btrfs_extent_data_ref *dref; - struct extent_buffer *eb; - struct btrfs_extent_item *ei; - struct btrfs_key key; struct list_head data_refs = LIST_HEAD_INIT(data_refs); struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); - struct __data_ref *ref_d; - struct __shared_ref *ref_s; + struct btrfs_trans_handle *trans; + struct ulist *refs; + struct ulist *roots; + struct ulist_node *ref_node = NULL; + struct ulist_node *root_node = NULL; + struct seq_list seq_elem; + struct btrfs_delayed_ref_root *delayed_refs; - eb = path->nodes[0]; - ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - item_size = btrfs_item_size_nr(eb, path->slots[0]); - - /* first we iterate the inline refs, ... */ - do { - last = __get_extent_inline_ref(&ptr, eb, ei, item_size, - &eiref, &type); - if (last == -ENOENT) { - ret = 0; - break; - } - if (last < 0) { - ret = last; - break; - } + trans = btrfs_join_transaction(fs_info->extent_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); - if (type == BTRFS_EXTENT_DATA_REF_KEY) { - dref = (struct btrfs_extent_data_ref *)(&eiref->offset); - ret = __data_list_add_eb(&data_refs, eb, dref); - } else if (type == BTRFS_SHARED_DATA_REF_KEY) { - logical = btrfs_extent_inline_ref_offset(eb, eiref); - ret = __shared_list_add(&shared_refs, logical); - } - } while (!ret && !last); + pr_debug("resolving all inodes for extent %llu\n", + extent_item_objectid); - /* ... then we proceed to in-tree references and ... */ - while (!ret) { - ++path->slots[0]; - if (path->slots[0] > btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(fs_info->extent_root, path); - if (ret) { - if (ret == 1) - ret = 0; /* we're done */ - break; - } - eb = path->nodes[0]; - } - btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - if (key.objectid != extent_item_objectid) - break; - if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { - dref = btrfs_item_ptr(eb, path->slots[0], - struct btrfs_extent_data_ref); - ret = __data_list_add_eb(&data_refs, eb, dref); - } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { - ret = __shared_list_add(&shared_refs, key.offset); - } - } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + btrfs_get_delayed_seq(delayed_refs, &seq_elem); + spin_unlock(&delayed_refs->lock); - btrfs_release_path(path); + ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, + extent_item_pos, seq_elem.seq, + &refs); - /* - * ... only at the very end we can process the refs we found. this is - * because the iterator function we call is allowed to make tree lookups - * and we have to avoid deadlocks. additionally, we need more tree - * lookups ourselves for shared data refs. - */ - while (!list_empty(&data_refs)) { - ref_d = list_first_entry(&data_refs, struct __data_ref, list); - list_del(&ref_d->list); - if (!ret) - ret = iterate(ref_d->inum, extent_offset + - ref_d->extent_data_item_offset, - ref_d->root, ctx); - kfree(ref_d); - } + if (ret) + goto out; - while (!list_empty(&shared_refs)) { - ref_s = list_first_entry(&shared_refs, struct __shared_ref, - list); - list_del(&ref_s->list); - if (!ret) - ret = __iter_shared_inline_ref(fs_info, - ref_s->disk_byte, - extent_item_objectid, - extent_offset, path, - &data_refs, - iterate, ctx); - kfree(ref_s); + while (!ret && (ref_node = ulist_next(refs, ref_node))) { + ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, + seq_elem.seq, &roots); + if (ret) + break; + while (!ret && (root_node = ulist_next(roots, root_node))) { + pr_debug("root %llu references leaf %llu\n", + root_node->val, ref_node->val); + ret = iterate_leaf_refs(fs_info, path, ref_node->val, + extent_item_objectid, + extent_item_pos, root_node->val, + iterate, ctx); + } } + ulist_free(refs); + ulist_free(roots); +out: + btrfs_put_delayed_seq(delayed_refs, &seq_elem); + btrfs_end_transaction(trans, fs_info->extent_root); return ret; } @@ -1369,19 +1202,20 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, iterate_extent_inodes_t *iterate, void *ctx) { int ret; - u64 offset; + u64 extent_item_pos; struct btrfs_key found_key; ret = extent_from_logical(fs_info, logical, path, &found_key); + btrfs_release_path(path); if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) ret = -EINVAL; if (ret < 0) return ret; - offset = logical - found_key.objectid; + extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(fs_info, path, found_key.objectid, - offset, iterate, ctx); + extent_item_pos, iterate, ctx); return ret; } @@ -1426,6 +1260,10 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ + pr_debug("following ref at offset %u for inode %llu in " + "tree %llu\n", cur, + (unsigned long long)found_key.objectid, + (unsigned long long)fs_root->objectid); ret = iterate(parent, iref, eb, ctx); if (ret) { free_extent_buffer(eb); @@ -1466,10 +1304,14 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, return PTR_ERR(fspath); if (fspath > fspath_min) { + pr_debug("path resolved: %s\n", fspath); ipath->fspath->val[i] = (u64)(unsigned long)fspath; ++ipath->fspath->elem_cnt; ipath->fspath->bytes_left = fspath - fspath_min; } else { + pr_debug("missed path, not enough space. missing bytes: %lu, " + "constructed so far: %s\n", + (unsigned long)(fspath_min - fspath), fspath_min); ++ipath->fspath->elem_missed; ipath->fspath->bytes_missing += fspath_min - fspath; ipath->fspath->bytes_left = 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c48f2e931ea..9b0526872b7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2976,7 +2976,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, { int ret = 0; int size; - u64 extent_offset; + u64 extent_item_pos; struct btrfs_ioctl_logical_ino_args *loi; struct btrfs_data_container *inodes = NULL; struct btrfs_path *path = NULL; @@ -3007,15 +3007,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, } ret = extent_from_logical(root->fs_info, loi->logical, path, &key); + btrfs_release_path(path); if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) ret = -ENOENT; if (ret < 0) goto out; - extent_offset = loi->logical - key.objectid; + extent_item_pos = loi->logical - key.objectid; ret = iterate_extent_inodes(root->fs_info, path, key.objectid, - extent_offset, build_ino_list, inodes); + extent_item_pos, build_ino_list, + inodes); if (ret < 0) goto out; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c27bcb67f33..b5edff25a53 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -309,7 +309,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, u8 ref_level; unsigned long ptr = 0; const int bufsize = 4096; - u64 extent_offset; + u64 extent_item_pos; path = btrfs_alloc_path(); @@ -329,12 +329,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, if (ret < 0) goto out; - extent_offset = swarn.logical - found_key.objectid; + extent_item_pos = swarn.logical - found_key.objectid; swarn.extent_item_size = found_key.offset; eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); item_size = btrfs_item_size_nr(eb, path->slots[0]); + btrfs_release_path(path); if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { @@ -351,7 +352,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, } else { swarn.path = path; iterate_extent_inodes(fs_info, path, found_key.objectid, - extent_offset, + extent_item_pos, scrub_print_warning_inode, &swarn); } -- cgit v1.2.3 From 6bf7e080d5bcb0d399ee38ce3dabbfad64448192 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 1 Dec 2011 14:35:19 +0100 Subject: Btrfs: make sure we're not using obsolete code in btrfs_get_extent There's code in btrfs_get_extent that should never be used. This patch turns a WARN_ON(1) into a BUG(), hoping we can remove the transaction code from btrfs_get_extent soon. Signed-off-by: Jan Schmidt --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ea819386b86..603d740f0f1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5022,7 +5022,7 @@ again: } flush_dcache_page(page); } else if (create && PageUptodate(page)) { - WARN_ON(1); + BUG(); if (!trans) { kunmap(page); free_extent_map(em); -- cgit v1.2.3 From 68c97153fb7f2877f98aa6c29546381d9cad2fed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2012 13:22:46 -0500 Subject: SUNRPC: Clean up the RPCSEC_GSS service ticket requests Instead of hacking specific service names into gss_encode_v1_msg, we should just allow the caller to specify the service name explicitly. Signed-off-by: Trond Myklebust Acked-by: J. Bruce Fields --- fs/nfs/client.c | 2 +- fs/nfsd/nfs4callback.c | 2 +- include/linux/sunrpc/auth.h | 3 ++- include/linux/sunrpc/auth_gss.h | 2 +- net/sunrpc/auth_generic.c | 6 ++++-- net/sunrpc/auth_gss/auth_gss.c | 40 +++++++++++++++++++++++----------------- 6 files changed, 32 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 873bf00d51a..32ea37198e9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -185,7 +185,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; #endif - cred = rpc_lookup_machine_cred(); + cred = rpc_lookup_machine_cred("*"); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; nfs_fscache_get_client_cookie(clp); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7748d6a18d9..6f3ebb48b12 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -718,7 +718,7 @@ int set_callback_cred(void) { if (callback_cred) return 0; - callback_cred = rpc_lookup_machine_cred(); + callback_cred = rpc_lookup_machine_cred("nfs"); if (!callback_cred) return -ENOMEM; return 0; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index febc4dbec2c..7874a8a5663 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -26,6 +26,7 @@ struct auth_cred { uid_t uid; gid_t gid; struct group_info *group_info; + const char *principal; unsigned char machine_cred : 1; }; @@ -127,7 +128,7 @@ void rpc_destroy_generic_auth(void); void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); -struct rpc_cred * rpc_lookup_machine_cred(void); +struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 8eee9dbbfe7..f1cfd4c85cd 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -82,8 +82,8 @@ struct gss_cred { enum rpc_gss_svc gc_service; struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; + const char *gc_principal; unsigned long gc_upcall_timestamp; - unsigned char gc_machine_cred : 1; }; #endif /* __KERNEL__ */ diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index e010a015d99..1426ec3d0a5 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -41,15 +41,17 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred); /* * Public call interface for looking up machine creds. */ -struct rpc_cred *rpc_lookup_machine_cred(void) +struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) { struct auth_cred acred = { .uid = RPC_MACHINE_CRED_USERID, .gid = RPC_MACHINE_CRED_GROUPID, + .principal = service_name, .machine_cred = 1, }; - dprintk("RPC: looking up machine cred\n"); + dprintk("RPC: looking up machine cred for service %s\n", + service_name); return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index afb56553dfe..28d72d29873 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -392,7 +392,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) } static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, int machine_cred) + struct rpc_clnt *clnt, + const char *service_name) { struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; @@ -407,12 +408,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, p += len; gss_msg->msg.len += len; } - if (machine_cred) { - len = sprintf(p, "service=* "); - p += len; - gss_msg->msg.len += len; - } else if (!strcmp(clnt->cl_program->name, "nfs4_cb")) { - len = sprintf(p, "service=nfs "); + if (service_name != NULL) { + len = sprintf(p, "service=%s ", service_name); p += len; gss_msg->msg.len += len; } @@ -429,17 +426,18 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, } static void gss_encode_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, int machine_cred) + struct rpc_clnt *clnt, + const char *service_name) { if (pipe_version == 0) gss_encode_v0_msg(gss_msg); else /* pipe_version == 1 */ - gss_encode_v1_msg(gss_msg, clnt, machine_cred); + gss_encode_v1_msg(gss_msg, clnt, service_name); } -static inline struct gss_upcall_msg * -gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt, - int machine_cred) +static struct gss_upcall_msg * +gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, + uid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; int vers; @@ -459,7 +457,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt, atomic_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; - gss_encode_msg(gss_msg, clnt, machine_cred); + gss_encode_msg(gss_msg, clnt, service_name); return gss_msg; } @@ -471,7 +469,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr struct gss_upcall_msg *gss_new, *gss_msg; uid_t uid = cred->cr_uid; - gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); + gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) return gss_new; gss_msg = gss_add_msg(gss_new); @@ -995,7 +993,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; - cred->gc_machine_cred = acred->machine_cred; + cred->gc_principal = NULL; + if (acred->machine_cred) + cred->gc_principal = acred->principal; kref_get(&gss_auth->kref); return &cred->gc_base; @@ -1030,7 +1030,12 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: - if (acred->machine_cred != gss_cred->gc_machine_cred) + if (acred->principal != NULL) { + if (gss_cred->gc_principal == NULL) + return 0; + return strcmp(acred->principal, gss_cred->gc_principal) == 0; + } + if (gss_cred->gc_principal != NULL) return 0; return rc->cr_uid == acred->uid; } @@ -1104,7 +1109,8 @@ static int gss_renew_cred(struct rpc_task *task) struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { .uid = oldcred->cr_uid, - .machine_cred = gss_cred->gc_machine_cred, + .principal = gss_cred->gc_principal, + .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), }; struct rpc_cred *new; -- cgit v1.2.3 From 2edb6bc3852c681c0d948245bd55108dc6407604 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Nov 2011 11:46:31 +1100 Subject: NFS - fix recent breakage to NFS error handling. From c6d615d2b97fe305cbf123a8751ced859dca1d5e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Nov 2011 09:39:05 +1100 Subject: [PATCH] NFS - fix recent breakage to NFS error handling. commit 02c24a82187d5a628c68edfe71ae60dc135cd178 made a small and presumably unintended change to write error handling in NFS. Previously an error from filemap_write_and_wait_range would only be of interest if nfs_file_fsync did not return an error. After this commit, an error from filemap_write_and_wait_range would mean that (the rest of) nfs_file_fsync would not even be called. This means that: 1/ you are more likely to see EIO than e.g. EDQUOT or ENOSPC. 2/ NFS_CONTEXT_ERROR_WRITE remains set for longer so more writes are synchronous. This patch restores previous behaviour. Cc: stable@kernel.org Cc: Josef Bacik Cc: Jan Kara Cc: Al Viro Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 606ef0f20ae..c43a452f7da 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) datasync); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; mutex_lock(&inode->i_mutex); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); + if (status >= 0 && ret < 0) + status = ret; have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); -- cgit v1.2.3 From 8a0d551a59ac92d8ff048d6cb29d3a02073e81e8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 20 Dec 2011 06:57:45 -0500 Subject: nfs: fix regression in handling of context= option in NFSv4 Setting the security context of a NFSv4 mount via the context= mount option is currently broken. The NFSv4 codepath allocates a parsed options struct, and then parses the mount options to fill it. It eventually calls nfs4_remote_mount which calls security_init_mnt_opts. That clobbers the lsm_opts struct that was populated earlier. This bug also looks like it causes a small memory leak on each v4 mount where context= is used. Fix this by moving the initialization of the lsm_opts into nfs_alloc_parsed_mount_data. Also, add a destructor for nfs_parsed_mount_data to make it easier to free all of the allocations hanging off of it, and to ensure that the security_free_mnt_opts is called whenever security_init_mnt_opts is. I believe this regression was introduced quite some time ago, probably by commit c02d7adf. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 134777406ee..3ada13c9986 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -909,10 +909,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->auth_flavor_len = 1; data->version = version; data->minorversion = 0; + security_init_mnt_opts(&data->lsm_opts); } return data; } +static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) +{ + if (data) { + kfree(data->client_address); + kfree(data->mount_server.hostname); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); + security_free_mnt_opts(&data->lsm_opts); + kfree(data); + } +} + /* * Sanity-check a server address provided by the mount command. * @@ -2220,9 +2234,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) - goto out_free_fh; - - security_init_mnt_opts(&data->lsm_opts); + goto out; /* Validate the mount data */ error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); @@ -2234,8 +2246,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, #ifdef CONFIG_NFS_V4 if (data->version == 4) { mntroot = nfs4_try_mount(flags, dev_name, data); - kfree(data->client_address); - kfree(data->nfs_server.export_path); goto out; } #endif /* CONFIG_NFS_V4 */ @@ -2290,13 +2300,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; out: - kfree(data->nfs_server.hostname); - kfree(data->mount_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); -out_free_fh: + nfs_free_parsed_mount_data(data); nfs_free_fhandle(mntfh); - kfree(data); return mntroot; out_err_nosb: @@ -2623,9 +2628,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) - goto out_free_fh; - - security_init_mnt_opts(&data->lsm_opts); + goto out; /* Get a volume representation */ server = nfs4_create_server(data, mntfh); @@ -2677,13 +2680,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; - security_free_mnt_opts(&data->lsm_opts); nfs_free_fhandle(mntfh); return mntroot; out: - security_free_mnt_opts(&data->lsm_opts); -out_free_fh: nfs_free_fhandle(mntfh); return ERR_PTR(error); @@ -2838,7 +2838,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, data = nfs_alloc_parsed_mount_data(4); if (data == NULL) - goto out_free_data; + goto out; /* Validate the mount data */ error = nfs4_validate_mount_data(raw_data, data, dev_name); @@ -2852,12 +2852,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, error = PTR_ERR(res); out: - kfree(data->client_address); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); -out_free_data: - kfree(data); + nfs_free_parsed_mount_data(data); dprintk("<-- nfs4_mount() = %d%s\n", error, error != 0 ? " [error]" : ""); return res; -- cgit v1.2.3 From 61f2e5106582d02f30b6807e3f9c07463c572ccb Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 9 Nov 2011 13:58:20 -0500 Subject: NFSv4.1: fix backchannel slotid off-by-one bug Cc:stable@kernel.org Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 43926add945..54cea8ad5a7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s enter. slotid %d seqid %d\n", __func__, args->csa_slotid, args->csa_sequenceid); - if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) + if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); slot = tbl->slots + args->csa_slotid; -- cgit v1.2.3 From aacd5537270a752fe12a9914a207284fc2341c6d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 9 Nov 2011 13:58:21 -0500 Subject: NFSv4.1: cleanup init and reset of session slot tables We are either initializing or resetting a session. Initialize or reset the session slot tables accordingly. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 59 +++++++++++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d9f4d78c341..a64aa56e140 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5021,23 +5021,6 @@ out: return ret; } -/* - * Reset the forechannel and backchannel slot tables - */ -static int nfs4_reset_slot_tables(struct nfs4_session *session) -{ - int status; - - status = nfs4_reset_slot_table(&session->fc_slot_table, - session->fc_attrs.max_reqs, 1); - if (status) - return status; - - status = nfs4_reset_slot_table(&session->bc_slot_table, - session->bc_attrs.max_reqs, 0); - return status; -} - /* Destroy the slot table */ static void nfs4_destroy_slot_tables(struct nfs4_session *session) { @@ -5083,29 +5066,35 @@ out: } /* - * Initialize the forechannel and backchannel tables + * Initialize or reset the forechannel and backchannel tables */ -static int nfs4_init_slot_tables(struct nfs4_session *session) +static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) { struct nfs4_slot_table *tbl; - int status = 0; + int status; - tbl = &session->fc_slot_table; + dprintk("--> %s\n", __func__); + /* Fore channel */ + tbl = &ses->fc_slot_table; if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, - session->fc_attrs.max_reqs, 1); + status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); + if (status) /* -ENOMEM */ + return status; + } else { + status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1); if (status) return status; } - - tbl = &session->bc_slot_table; + /* Back channel */ + tbl = &ses->bc_slot_table; if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, - session->bc_attrs.max_reqs, 0); + status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); if (status) - nfs4_destroy_slot_tables(session); - } - + /* Fore and back channel share a connection so get + * both slot tables or neither */ + nfs4_destroy_slot_tables(ses); + } else + status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0); return status; } @@ -5293,13 +5282,9 @@ int nfs4_proc_create_session(struct nfs_client *clp) if (status) goto out; - /* Init and reset the fore channel */ - status = nfs4_init_slot_tables(session); - dprintk("slot table initialization returned %d\n", status); - if (status) - goto out; - status = nfs4_reset_slot_tables(session); - dprintk("slot table reset returned %d\n", status); + /* Init or reset the session slot tables */ + status = nfs4_setup_session_slot_tables(session); + dprintk("slot table setup returned %d\n", status); if (status) goto out; -- cgit v1.2.3 From aabd0b40b327d5c6518c8c908819b9bf864ad56a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 9 Nov 2011 13:58:22 -0500 Subject: NFSv4.1: change nfs4_free_slot parameters for dynamic slots Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a64aa56e140..b2104461ed4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -363,9 +363,8 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) +nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) { - int free_slotid = free_slot - tbl->slots; int slotid = free_slotid; BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); @@ -430,7 +429,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) } spin_lock(&tbl->slot_tbl_lock); - nfs4_free_slot(tbl, res->sr_slot); + nfs4_free_slot(tbl, res->sr_slot - tbl->slots); nfs4_check_drain_fc_complete(res->sr_session); spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; -- cgit v1.2.3 From 0b1c8fc43c1f9fcde2d18182988f05eeaaae509b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 9 Nov 2011 13:58:26 -0500 Subject: NFSv4.1: cleanup comment and debug printk Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2104461ed4..fcc2408d7ab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -552,13 +552,10 @@ int nfs41_setup_sequence(struct nfs4_session *session, spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { - /* - * The state manager will wait until the slot table is empty. - * Schedule the reset thread - */ + /* The state manager will wait until the slot table is empty */ rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s Schedule Session Reset\n", __func__); + dprintk("%s session is draining\n", __func__); return -EAGAIN; } -- cgit v1.2.3 From 3476f114addb7b96912840a234702f660a1f460b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 11 Aug 2011 13:54:28 -0700 Subject: nfs: fix a minor do_div portability issue This change modifies filelayout_get_dense_offset() to use the functions in math64.h and thus avoid a 32-bit platform compile error trying to use do_div() on an s64 type. Signed-off-by: Chris Metcalf Reviewed-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index a62d36b9a99..71ec08617e2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -49,13 +49,14 @@ filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, loff_t offset) { u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; - u64 tmp; + u64 stripe_no; + u32 rem; offset -= flseg->pattern_offset; - tmp = offset; - do_div(tmp, stripe_width); + stripe_no = div_u64(offset, stripe_width); + div_u64_rem(offset, flseg->stripe_unit, &rem); - return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit); + return stripe_no * flseg->stripe_unit + rem; } /* This function is used by the layout driver to calculate the -- cgit v1.2.3 From bf118a342f10dafe44b14451a1392c3254629a1f Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 7 Dec 2011 11:55:27 -0500 Subject: NFSv4: include bitmap in nfsv4 get acl data The NFSv4 bitmap size is unbounded: a server can return an arbitrary sized bitmap in an FATTR4_WORD0_ACL request. Replace using the nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data xdr length to the (cached) acl page data. This is a general solution to commit e5012d1f "NFSv4.1: update nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead when getting ACLs. Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved. Cc: stable@kernel.org Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 96 +++++++++++++++++++++++++++------------------- fs/nfs/nfs4xdr.c | 31 +++++++++++---- include/linux/nfs_xdr.h | 5 +++ include/linux/sunrpc/xdr.h | 2 + net/sunrpc/xdr.c | 3 +- 5 files changed, 89 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fcc2408d7ab..3b108011845 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3426,19 +3426,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) -static void buf_to_pages(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) -{ - const void *p = buf; - - *pgbase = offset_in_page(buf); - p -= *pgbase; - while (p < buf + buflen) { - *(pages++) = virt_to_page(p); - p += PAGE_CACHE_SIZE; - } -} - static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) { @@ -3535,9 +3522,19 @@ out: nfs4_set_cached_acl(inode, acl); } +/* + * The getxattr API returns the required buffer length when called with a + * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating + * the required buf. On a NULL buf, we send a page of data to the server + * guessing that the ACL request can be serviced by a page. If so, we cache + * up to the page of ACL data, and the 2nd call to getxattr is serviced by + * the cache. If not so, we throw away the page, and cache the required + * length. The next getxattr call will then produce another round trip to + * the server, this time with the input buf of the required size. + */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES]; + struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -3552,41 +3549,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - struct page *localpage = NULL; - int ret; + int ret = -ENOMEM, npages, i, acl_len = 0; - if (buflen < PAGE_SIZE) { - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - localpage = alloc_page(GFP_KERNEL); - resp_buf = page_address(localpage); - if (localpage == NULL) - return -ENOMEM; - args.acl_pages[0] = localpage; - args.acl_pgbase = 0; - args.acl_len = PAGE_SIZE; - } else { - resp_buf = buf; - buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + if (npages == 0) + npages = 1; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; + } + if (npages > 1) { + /* for decoding across pages */ + args.acl_scratch = alloc_page(GFP_KERNEL); + if (!args.acl_scratch) + goto out_free; } - ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); + args.acl_len = npages * PAGE_SIZE; + args.acl_pgbase = 0; + /* Let decode_getfacl know not to fail if the ACL data is larger than + * the page we send as a guess */ + if (buf == NULL) + res.acl_flags |= NFS4_ACL_LEN_REQUEST; + resp_buf = page_address(pages[0]); + + dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", + __func__, buf, buflen, npages, args.acl_len); + ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), + &msg, &args.seq_args, &res.seq_res, 0); if (ret) goto out_free; - if (res.acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, res.acl_len); + + acl_len = res.acl_len - res.acl_data_offset; + if (acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, acl_len); else - nfs4_write_cached_acl(inode, resp_buf, res.acl_len); + nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, + acl_len); if (buf) { ret = -ERANGE; - if (res.acl_len > buflen) + if (acl_len > buflen) goto out_free; - if (localpage) - memcpy(buf, resp_buf, res.acl_len); + _copy_from_pages(buf, pages, res.acl_data_offset, + res.acl_len); } - ret = res.acl_len; + ret = acl_len; out_free: - if (localpage) - __free_page(localpage); + for (i = 0; i < npages; i++) + if (pages[i]) + __free_page(pages[i]); + if (args.acl_scratch) + __free_page(args.acl_scratch); return ret; } @@ -3617,6 +3633,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) + /* -ENOENT is returned if there is no ACL or if there is an ACL + * but no cached acl data, just the acl length */ return ret; return nfs4_get_acl_uncached(inode, buf, buflen); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6161b213ed..dcaf69309d8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); + xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); + encode_nops(&hdr); } @@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr) } static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, - size_t *acl_len) + struct nfs_getaclres *res) { - __be32 *savep; + __be32 *savep, *bm_p; uint32_t attrlen, bitmap[3] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; - *acl_len = 0; + res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + bm_p = xdr->p; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, size_t hdrlen; u32 recvd; + /* The bitmap (xdr len + bitmaps) and the attr xdr len words + * are stored with the acl data to handle the problem of + * variable length bitmaps.*/ + xdr->p = bm_p; + res->acl_data_offset = be32_to_cpup(bm_p) + 2; + res->acl_data_offset <<= 2; + /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + attrlen += res->acl_data_offset; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - dprintk("NFS: server cheating in getattr" - " acl reply: attrlen %u > recvd %u\n", + if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { + /* getxattr interface called with a NULL buf */ + res->acl_len = attrlen; + goto out; + } + dprintk("NFS: acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; } xdr_read_pages(xdr, attrlen); - *acl_len = attrlen; + res->acl_len = attrlen; } else status = -EOPNOTSUPP; @@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(xdr, rqstp, &res->acl_len); + status = decode_getacl(xdr, rqstp, res); out: return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2a7c533be5d..6c898afe609 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -602,11 +602,16 @@ struct nfs_getaclargs { size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; + struct page * acl_scratch; struct nfs4_sequence_args seq_args; }; +/* getxattr ACL interface flags */ +#define NFS4_ACL_LEN_REQUEST 0x0001 /* zero length getxattr buffer */ struct nfs_getaclres { size_t acl_len; + size_t acl_data_offset; + int acl_flags; struct nfs4_sequence_res seq_res; }; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a20970ef9e4..af70af33354 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -191,6 +191,8 @@ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); +extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, + size_t len); /* * Provide some simple tools for XDR buffer overflow-checking etc. diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 277ebd4bf09..593f4c60530 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) * Copies data into an arbitrary memory location from an array of pages * The copy is assumed to be non-overlapping. */ -static void +void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) { struct page **pgfrom; @@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) } while ((len -= copy) != 0); } +EXPORT_SYMBOL_GPL(_copy_from_pages); /* * xdr_shrink_bufhead -- cgit v1.2.3 From 414adf14cd3b52e411f79d941a15d0fd4af427fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Dec 2011 16:13:39 -0500 Subject: NFS: Clean up nfs4_find_state_owners_locked() There's no longer a need to check the so_server field in the state owner, because nowadays the RB tree we search for state owners contains owners for that only server. Make nfs4_find_state_owners_locked() use the same tree searching logic as nfs4_insert_state_owner_locked(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6a7107ae6b7..6354e4fcc82 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -377,31 +377,22 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) { struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; - struct nfs4_state_owner *sp, *res = NULL; + struct nfs4_state_owner *sp; while (*p != NULL) { parent = *p; sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); - if (server < sp->so_server) { - p = &parent->rb_left; - continue; - } - if (server > sp->so_server) { - p = &parent->rb_right; - continue; - } if (cred < sp->so_cred) p = &parent->rb_left; else if (cred > sp->so_cred) p = &parent->rb_right; else { atomic_inc(&sp->so_count); - res = sp; - break; + return sp; } } - return res; + return NULL; } static struct nfs4_state_owner * -- cgit v1.2.3 From a0ea787b027b79cf2e01c6758e5246db06520158 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Tue, 3 Jan 2012 15:27:50 -0600 Subject: fs/9p: check schedule_timeout_interruptible return value In v9fs_file_do_lock() we need to check return value of schedule_timeout_interruptible() and exit the loop when it returns nonzero, otherwise the loop is not really interruptible and after the signal, the loop is no longer throttled by P9_LOCK_TIMEOUT. Signed-off-by: Jim Garlick Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 62857a810a7..a7ac45d5bd2 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -204,7 +204,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) break; - schedule_timeout_interruptible(P9_LOCK_TIMEOUT); + if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) + break; } /* map 9p status to VFS status */ -- cgit v1.2.3 From 5d3851530d6d68564e4e0ce04d0547d4d106fc72 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 28 Nov 2011 10:40:46 -0800 Subject: 9p: Reduce object size with CONFIG_NET_9P_DEBUG Reduce object size by deduplicating formats. Use vsprintf extension %pV. Rename P9_DPRINTK uses to p9_debug, align arguments. Add function for _p9_debug and macro to add __func__. Add missing "\n"s to p9_debug uses. Remove embedded function names as p9_debug adds it. Remove P9_EPRINTK macro and convert use to pr_. Add and use pr_fmt and pr_. $ size fs/9p/built-in.o* text data bss dec hex filename 62133 984 16000 79117 1350d fs/9p/built-in.o.new 67342 984 16928 85254 14d06 fs/9p/built-in.o.old $ size net/9p/built-in.o* text data bss dec hex filename 88792 4148 22024 114964 1c114 net/9p/built-in.o.new 94072 4148 23232 121452 1da6c net/9p/built-in.o.old Signed-off-by: Joe Perches Signed-off-by: Eric Van Hensbergen --- fs/9p/cache.c | 64 ++++++------- fs/9p/fid.c | 8 +- fs/9p/v9fs.c | 59 ++++++------ fs/9p/vfs_addr.c | 13 ++- fs/9p/vfs_dentry.c | 12 +-- fs/9p/vfs_dir.c | 13 ++- fs/9p/vfs_file.c | 31 +++---- fs/9p/vfs_inode.c | 86 +++++++++--------- fs/9p/vfs_inode_dotl.c | 85 +++++++++-------- fs/9p/vfs_super.c | 12 +-- fs/9p/xattr.c | 16 ++-- include/net/9p/9p.h | 28 ++---- net/9p/client.c | 242 ++++++++++++++++++++++++------------------------- net/9p/error.c | 6 +- net/9p/mod.c | 31 ++++++- net/9p/protocol.c | 8 +- net/9p/trans_fd.c | 113 ++++++++++++----------- net/9p/trans_rdma.c | 26 +++--- net/9p/trans_virtio.c | 34 +++---- net/9p/util.c | 4 +- 20 files changed, 452 insertions(+), 439 deletions(-) (limited to 'fs') diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 945aa5f02f9..a9ea73d6dcf 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -62,8 +62,8 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, uint16_t klen = 0; v9ses = (struct v9fs_session_info *)cookie_netfs_data; - P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses, - buffer, bufmax); + p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n", + v9ses, buffer, bufmax); if (v9ses->cachetag) klen = strlen(v9ses->cachetag); @@ -72,7 +72,7 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, return 0; memcpy(buffer, v9ses->cachetag, klen); - P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag); + p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag); return klen; } @@ -91,14 +91,14 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, &v9fs_cache_session_index_def, v9ses); - P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses, - v9ses->fscache); + p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n", + v9ses, v9ses->fscache); } void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) { - P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses, - v9ses->fscache); + p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n", + v9ses, v9ses->fscache); fscache_relinquish_cookie(v9ses->fscache, 0); v9ses->fscache = NULL; } @@ -109,8 +109,8 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, { const struct v9fs_inode *v9inode = cookie_netfs_data; memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, - v9inode->qid.path); + p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n", + &v9inode->vfs_inode, v9inode->qid.path); return sizeof(v9inode->qid.path); } @@ -120,8 +120,8 @@ static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, const struct v9fs_inode *v9inode = cookie_netfs_data; *size = i_size_read(&v9inode->vfs_inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode, - *size); + p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n", + &v9inode->vfs_inode, *size); } static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, @@ -129,8 +129,8 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, { const struct v9fs_inode *v9inode = cookie_netfs_data; memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, - v9inode->qid.version); + p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n", + &v9inode->vfs_inode, v9inode->qid.version); return sizeof(v9inode->qid.version); } @@ -206,8 +206,8 @@ void v9fs_cache_inode_get_cookie(struct inode *inode) &v9fs_cache_inode_index_def, v9inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, - v9inode->fscache); + p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", + inode, v9inode->fscache); } void v9fs_cache_inode_put_cookie(struct inode *inode) @@ -216,8 +216,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode) if (!v9inode->fscache) return; - P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, - v9inode->fscache); + p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n", + inode, v9inode->fscache); fscache_relinquish_cookie(v9inode->fscache, 0); v9inode->fscache = NULL; @@ -229,8 +229,8 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode) if (!v9inode->fscache) return; - P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, - v9inode->fscache); + p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n", + inode, v9inode->fscache); fscache_relinquish_cookie(v9inode->fscache, 1); v9inode->fscache = NULL; @@ -272,8 +272,8 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, v9inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", - inode, old, v9inode->fscache); + p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", + inode, old, v9inode->fscache); spin_unlock(&v9inode->fscache_lock); } @@ -323,7 +323,7 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) int ret; const struct v9fs_inode *v9inode = V9FS_I(inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); + p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); if (!v9inode->fscache) return -ENOBUFS; @@ -335,13 +335,13 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) switch (ret) { case -ENOBUFS: case -ENODATA: - P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret); + p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret); return 1; case 0: - P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); + p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); return ret; default: - P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); + p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); return ret; } } @@ -361,7 +361,7 @@ int __v9fs_readpages_from_fscache(struct inode *inode, int ret; const struct v9fs_inode *v9inode = V9FS_I(inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); + p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages); if (!v9inode->fscache) return -ENOBUFS; @@ -373,15 +373,15 @@ int __v9fs_readpages_from_fscache(struct inode *inode, switch (ret) { case -ENOBUFS: case -ENODATA: - P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret); + p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret); return 1; case 0: BUG_ON(!list_empty(pages)); BUG_ON(*nr_pages != 0); - P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); + p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); return ret; default: - P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); + p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); return ret; } } @@ -396,9 +396,9 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) int ret; const struct v9fs_inode *v9inode = V9FS_I(inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); + p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); - P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); + p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret); if (ret != 0) v9fs_uncache_page(inode, page); } @@ -409,7 +409,7 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) { const struct v9fs_inode *v9inode = V9FS_I(inode); - P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); + p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); if (PageFsCache(page)) fscache_wait_on_page_write(v9inode->fscache, page); } diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 85b67ffa2a4..da8eefbe830 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -45,8 +45,8 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) { struct v9fs_dentry *dent; - P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", - fid->fid, dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "fid %d dentry %s\n", + fid->fid, dentry->d_name.name); dent = dentry->d_fsdata; if (!dent) { @@ -79,8 +79,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) struct v9fs_dentry *dent; struct p9_fid *fid, *ret; - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", - dentry->d_name.name, dentry, uid, any); + p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", + dentry->d_name.name, dentry, uid, any); dent = (struct v9fs_dentry *) dentry->d_fsdata; ret = NULL; if (dent) { diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 2b78014a124..1964f98e74b 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -85,15 +87,15 @@ static int get_cache_mode(char *s) if (!strcmp(s, "loose")) { version = CACHE_LOOSE; - P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n"); + p9_debug(P9_DEBUG_9P, "Cache mode: loose\n"); } else if (!strcmp(s, "fscache")) { version = CACHE_FSCACHE; - P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n"); + p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n"); } else if (!strcmp(s, "none")) { version = CACHE_NONE; - P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n"); + p9_debug(P9_DEBUG_9P, "Cache mode: none\n"); } else - printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s); + pr_info("Unknown Cache mode %s\n", s); return version; } @@ -140,8 +142,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_debug: r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } @@ -154,8 +156,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_dfltuid: r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } @@ -164,8 +166,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_dfltgid: r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } @@ -174,8 +176,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_afid: r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } @@ -205,8 +207,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of cache arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of cache arg\n"); goto free_and_return; } ret = get_cache_mode(s); @@ -223,8 +225,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of access arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of access arg\n"); goto free_and_return; } @@ -240,8 +242,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) v9ses->uid = simple_strtoul(s, &e, 10); if (*e != '\0') { ret = -EINVAL; - printk(KERN_INFO "9p: Unknown access " - "argument %s.\n", s); + pr_info("Unknown access argument %s\n", + s); kfree(s); goto free_and_return; } @@ -254,9 +256,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #ifdef CONFIG_9P_FS_POSIX_ACL v9ses->flags |= V9FS_POSIX_ACL; #else - P9_DPRINTK(P9_DEBUG_ERROR, - "Not defined CONFIG_9P_FS_POSIX_ACL. " - "Ignoring posixacl option\n"); + p9_debug(P9_DEBUG_ERROR, + "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n"); #endif break; @@ -318,7 +319,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (IS_ERR(v9ses->clnt)) { retval = PTR_ERR(v9ses->clnt); v9ses->clnt = NULL; - P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n"); + p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); goto error; } @@ -371,7 +372,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (IS_ERR(fid)) { retval = PTR_ERR(fid); fid = NULL; - P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n"); + p9_debug(P9_DEBUG_ERROR, "cannot attach\n"); goto error; } @@ -429,7 +430,7 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { - P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); + p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); } @@ -442,7 +443,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) { - P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); + p9_debug(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); p9_client_begin_disconnect(v9ses->clnt); } @@ -591,23 +592,23 @@ static void v9fs_cache_unregister(void) static int __init init_v9fs(void) { int err; - printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); + pr_info("Installing v9fs 9p2000 file system support\n"); /* TODO: Setup list of registered trasnport modules */ err = register_filesystem(&v9fs_fs_type); if (err < 0) { - printk(KERN_ERR "Failed to register filesystem\n"); + pr_err("Failed to register filesystem\n"); return err; } err = v9fs_cache_register(); if (err < 0) { - printk(KERN_ERR "Failed to register v9fs for caching\n"); + pr_err("Failed to register v9fs for caching\n"); goto out_fs_unreg; } err = v9fs_sysfs_init(); if (err < 0) { - printk(KERN_ERR "Failed to register with sysfs\n"); + pr_err("Failed to register with sysfs\n"); goto out_sysfs_cleanup; } diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 2524e4cbb8e..0ad61c6a65a 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -56,7 +56,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) struct inode *inode; inode = page->mapping->host; - P9_DPRINTK(P9_DEBUG_VFS, "\n"); + p9_debug(P9_DEBUG_VFS, "\n"); BUG_ON(!PageLocked(page)); @@ -116,14 +116,14 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, struct inode *inode; inode = mapping->host; - P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); + p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); if (ret == 0) return ret; ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); - P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret); + p9_debug(P9_DEBUG_VFS, " = %d\n", ret); return ret; } @@ -263,10 +263,9 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * Now that we do caching with cache mode enabled, We need * to support direct IO */ - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " - "off/no(%lld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, - (long long) pos, nr_segs); + p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", + iocb->ki_filp->f_path.dentry->d_name.name, + (long long)pos, nr_segs); return -EINVAL; } diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index e022890c6f4..d529437ff44 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -53,8 +53,8 @@ static int v9fs_dentry_delete(const struct dentry *dentry) { - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, - dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", + dentry->d_name.name, dentry); return 1; } @@ -66,8 +66,8 @@ static int v9fs_dentry_delete(const struct dentry *dentry) */ static int v9fs_cached_dentry_delete(const struct dentry *dentry) { - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", + dentry->d_name.name, dentry); /* Don't cache negative dentries */ if (!dentry->d_inode) @@ -86,8 +86,8 @@ static void v9fs_dentry_release(struct dentry *dentry) struct v9fs_dentry *dent; struct p9_fid *temp, *current_fid; - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, - dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", + dentry->d_name.name, dentry); dent = dentry->d_fsdata; if (dent) { list_for_each_entry_safe(current_fid, temp, &dent->fidlist, diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 598fff1a54e..ff911e77965 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -140,7 +140,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) int reclen = 0; struct p9_rdir *rdir; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); fid = filp->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; @@ -168,7 +168,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); if (err) { - P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); + p9_debug(P9_DEBUG_VFS, "returned %d\n", err); err = -EIO; p9stat_free(&st); goto unlock_and_exit; @@ -213,7 +213,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, struct p9_dirent curdirent; u64 oldoffset = 0; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); fid = filp->private_data; buflen = fid->clnt->msize - P9_READDIRHDRSZ; @@ -244,7 +244,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, rdir->tail - rdir->head, &curdirent); if (err < 0) { - P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); + p9_debug(P9_DEBUG_VFS, "returned %d\n", err); err = -EIO; goto unlock_and_exit; } @@ -290,9 +290,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) struct p9_fid *fid; fid = filp->private_data; - P9_DPRINTK(P9_DEBUG_VFS, - "v9fs_dir_release: inode: %p filp: %p fid: %d\n", - inode, filp, fid ? fid->fid : -1); + p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", + inode, filp, fid ? fid->fid : -1); if (fid) p9_client_clunk(fid); return 0; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a7ac45d5bd2..fc06fd27065 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -61,7 +61,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) struct p9_fid *fid; int omode; - P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); + p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); v9inode = V9FS_I(inode); v9ses = v9fs_inode2v9ses(inode); if (v9fs_proto_dotl(v9ses)) @@ -135,7 +135,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) int res = 0; struct inode *inode = filp->f_path.dentry->d_inode; - P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); + p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) @@ -305,8 +305,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_path.dentry->d_inode; int ret = -ENOLCK; - P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, - cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", + filp, cmd, fl, filp->f_path.dentry->d_name.name); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) @@ -341,8 +341,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd, struct inode *inode = filp->f_path.dentry->d_inode; int ret = -ENOLCK; - P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, - cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", + filp, cmd, fl, filp->f_path.dentry->d_name.name); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) @@ -385,8 +385,8 @@ v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count, { int n, total, size; - P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", + fid->fid, (long long unsigned)offset, count); n = 0; total = 0; size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; @@ -444,7 +444,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count, struct p9_fid *fid; size_t size; - P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); + p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); fid = filp->private_data; size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; @@ -471,8 +471,8 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid, loff_t origin = *offset; unsigned long pg_start, pg_end; - P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, - (int)count, (int)*offset); + p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n", + data, (int)count, (int)*offset); clnt = fid->clnt; do { @@ -553,7 +553,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, return retval; mutex_lock(&inode->i_mutex); - P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); + p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; v9fs_blank_wstat(&wstat); @@ -576,8 +576,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, return retval; mutex_lock(&inode->i_mutex); - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", - filp, datasync); + p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; @@ -608,8 +607,8 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = filp->f_path.dentry->d_inode; - P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n", - page, (unsigned long)filp->private_data); + p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", + page, (unsigned long)filp->private_data); v9inode = V9FS_I(inode); /* make sure the cache has finished storing the page */ diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 879ed885173..85c29733d8b 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -133,9 +135,8 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, res |= S_IFBLK; break; default: - P9_DPRINTK(P9_DEBUG_ERROR, - "Unknown special type %c %s\n", type, - stat->extension); + p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n", + type, stat->extension); }; *rdev = MKDEV(major, minor); } else @@ -281,8 +282,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, } else if (v9fs_proto_dotu(v9ses)) { inode->i_op = &v9fs_file_inode_operations; } else { - P9_DPRINTK(P9_DEBUG_ERROR, - "special files without extended mode\n"); + p9_debug(P9_DEBUG_ERROR, + "special files without extended mode\n"); err = -EINVAL; goto error; } @@ -307,8 +308,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, break; case S_IFLNK: if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { - P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " - "legacy protocol.\n"); + p9_debug(P9_DEBUG_ERROR, + "extended modes used with legacy protocol\n"); err = -EINVAL; goto error; } @@ -335,8 +336,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, break; default: - P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", - mode, mode & S_IFMT); + p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", + mode, mode & S_IFMT); err = -EINVAL; goto error; } @@ -358,11 +359,12 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); + p9_debug(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); inode = new_inode(sb); if (!inode) { - P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); + pr_warn("%s (%d): Problem allocating inode\n", + __func__, task_pid_nr(current)); return ERR_PTR(-ENOMEM); } err = v9fs_init_inode(v9ses, inode, mode, rdev); @@ -578,15 +580,15 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) struct p9_fid *v9fid, *dfid; struct v9fs_session_info *v9ses; - P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", - dir, dentry, flags); + p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", + dir, dentry, flags); v9ses = v9fs_inode2v9ses(dir); inode = dentry->d_inode; dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) { retval = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); return retval; } if (v9fs_proto_dotl(v9ses)) @@ -635,7 +637,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, struct p9_fid *dfid, *ofid, *fid; struct inode *inode; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); err = 0; ofid = NULL; @@ -644,7 +646,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return ERR_PTR(err); } @@ -652,13 +654,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, ofid = p9_client_walk(dfid, 0, NULL, 1); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); return ERR_PTR(err); } err = p9_client_fcreate(ofid, name, perm, mode, extension); if (err < 0) { - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); goto error; } @@ -666,7 +668,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); fid = NULL; goto error; } @@ -675,7 +677,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } err = v9fs_fid_add(dentry, fid); @@ -793,7 +795,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct p9_fid *fid; struct v9fs_session_info *v9ses; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); @@ -831,8 +833,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, char *name; int result = 0; - P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", - dir, dentry->d_name.name, dentry, nameidata); + p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", + dir, dentry->d_name.name, dentry, nameidata); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -938,7 +940,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; - P9_DPRINTK(P9_DEBUG_VFS, "\n"); + p9_debug(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; @@ -974,8 +976,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, * 9P .u can only handle file rename in the same directory */ - P9_DPRINTK(P9_DEBUG_ERROR, - "old dir and new dir are different\n"); + p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n"); retval = -EXDEV; goto clunk_newdir; } @@ -1031,7 +1032,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct p9_fid *fid; struct p9_wstat *st; - P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); err = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { @@ -1068,7 +1069,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) struct p9_fid *fid; struct p9_wstat wstat; - P9_DPRINTK(P9_DEBUG_VFS, "\n"); + p9_debug(P9_DEBUG_VFS, "\n"); retval = inode_change_ok(dentry->d_inode, iattr); if (retval) return retval; @@ -1213,7 +1214,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) struct p9_fid *fid; struct p9_wstat *st; - P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); @@ -1235,8 +1236,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) /* copy extension buffer into buffer */ strncpy(buffer, st->extension, buflen); - P9_DPRINTK(P9_DEBUG_VFS, - "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer); + p9_debug(P9_DEBUG_VFS, "%s -> %s (%s)\n", + dentry->d_name.name, st->extension, buffer); retval = strnlen(buffer, buflen); done: @@ -1257,7 +1258,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) int len = 0; char *link = __getname(); - P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); if (!link) link = ERR_PTR(-ENOMEM); @@ -1288,8 +1289,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); - P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, - IS_ERR(s) ? "" : s); + p9_debug(P9_DEBUG_VFS, " %s %s\n", + dentry->d_name.name, IS_ERR(s) ? "" : s); if (!IS_ERR(s)) __putname(s); } @@ -1312,7 +1313,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, v9ses = v9fs_inode2v9ses(dir); if (!v9fs_proto_dotu(v9ses)) { - P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); + p9_debug(P9_DEBUG_ERROR, "not extended\n"); return -EPERM; } @@ -1340,8 +1341,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, - dentry->d_name.name, symname); + p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", + dir->i_ino, dentry->d_name.name, symname); return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); } @@ -1362,9 +1363,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, char *name; struct p9_fid *oldfid; - P9_DPRINTK(P9_DEBUG_VFS, - " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, - old_dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", + dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); oldfid = v9fs_fid_clone(old_dentry); if (IS_ERR(oldfid)) @@ -1403,9 +1403,9 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) int retval; char *name; - P9_DPRINTK(P9_DEBUG_VFS, - " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, - dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); + p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", + dir->i_ino, dentry->d_name.name, mode, + MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) return -EINVAL; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0b5745e2194..73488fb69d3 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -283,13 +283,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, } name = (char *) dentry->d_name.name; - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " - "mode:0x%x\n", name, flags, omode); + p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n", + name, flags, omode); dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return err; } @@ -297,7 +297,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, ofid = p9_client_walk(dfid, 0, NULL, 1); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); return err; } @@ -307,16 +307,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { - P9_DPRINTK(P9_DEBUG_VFS, - "Failed to get acl values in creat %d\n", err); + p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n", + err); goto error; } err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), mode, gid, &qid); if (err < 0) { - P9_DPRINTK(P9_DEBUG_VFS, - "p9_client_open_dotl failed in creat %d\n", - err); + p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", + err); goto error; } v9fs_invalidate_inode_attr(dir); @@ -325,14 +324,14 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); fid = NULL; goto error; } inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } err = v9fs_fid_add(dentry, fid); @@ -408,7 +407,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); err = 0; v9ses = v9fs_inode2v9ses(dir); @@ -420,7 +419,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); dfid = NULL; goto error; } @@ -430,8 +429,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { - P9_DPRINTK(P9_DEBUG_VFS, - "Failed to get acl values in mkdir %d\n", err); + p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n", + err); goto error; } name = (char *) dentry->d_name.name; @@ -444,8 +443,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); fid = NULL; goto error; } @@ -453,8 +452,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", + err); goto error; } err = v9fs_fid_add(dentry, fid); @@ -495,7 +494,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, struct p9_fid *fid; struct p9_stat_dotl *st; - P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); err = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { @@ -537,7 +536,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) struct p9_fid *fid; struct p9_iattr_dotl p9attr; - P9_DPRINTK(P9_DEBUG_VFS, "\n"); + p9_debug(P9_DEBUG_VFS, "\n"); retval = inode_change_ok(dentry->d_inode, iattr); if (retval) @@ -670,14 +669,13 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, struct v9fs_session_info *v9ses; name = (char *) dentry->d_name.name; - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", - dir->i_ino, name, symname); + p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return err; } @@ -687,7 +685,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); if (err < 0) { - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); goto error; } @@ -697,8 +695,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); fid = NULL; goto error; } @@ -707,8 +705,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", + err); goto error; } err = v9fs_fid_add(dentry, fid); @@ -751,9 +749,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; - P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", - dir->i_ino, old_dentry->d_name.name, - dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", + dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); v9ses = v9fs_inode2v9ses(dir); dir_dentry = v9fs_dentry_from_dir_inode(dir); @@ -770,7 +767,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name); if (err < 0) { - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; } @@ -813,9 +810,9 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; - P9_DPRINTK(P9_DEBUG_VFS, - " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, - dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev)); + p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", + dir->i_ino, dentry->d_name.name, omode, + MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) return -EINVAL; @@ -825,7 +822,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); - P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); dfid = NULL; goto error; } @@ -835,8 +832,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { - P9_DPRINTK(P9_DEBUG_VFS, - "Failed to get acl values in mknod %d\n", err); + p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n", + err); goto error; } name = (char *) dentry->d_name.name; @@ -851,8 +848,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); - P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); fid = NULL; goto error; } @@ -860,8 +857,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", - err); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", + err); goto error; } err = v9fs_fid_add(dentry, fid); @@ -905,7 +902,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) char *link = __getname(); char *target; - P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); if (!link) { link = ERR_PTR(-ENOMEM); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index c70251d47ed..06d1973598a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -121,7 +121,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, struct p9_fid *fid; int retval = 0; - P9_DPRINTK(P9_DEBUG_VFS, " \n"); + p9_debug(P9_DEBUG_VFS, "\n"); v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) @@ -191,7 +191,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto release_sb; v9fs_fid_add(root, fid); - P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); + p9_debug(P9_DEBUG_VFS, " simple set mount, return 0\n"); return dget(sb->s_root); clunk_fid: @@ -223,7 +223,7 @@ static void v9fs_kill_super(struct super_block *s) { struct v9fs_session_info *v9ses = s->s_fs_info; - P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); + p9_debug(P9_DEBUG_VFS, " %p\n", s); kill_anon_super(s); @@ -231,7 +231,7 @@ static void v9fs_kill_super(struct super_block *s) v9fs_session_close(v9ses); kfree(v9ses); s->s_fs_info = NULL; - P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); + p9_debug(P9_DEBUG_VFS, "exiting kill_super\n"); } static void @@ -303,7 +303,7 @@ static int v9fs_write_inode(struct inode *inode, * send an fsync request to server irrespective of * wbc->sync_mode. */ - P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); + p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); v9inode = V9FS_I(inode); if (!v9inode->writeback_fid) return 0; @@ -326,7 +326,7 @@ static int v9fs_write_inode_dotl(struct inode *inode, * send an fsync request to server irrespective of * wbc->sync_mode. */ - P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); + p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); v9inode = V9FS_I(inode); if (!v9inode->writeback_fid) return 0; diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index d288773871b..29653b70a9c 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -32,8 +32,8 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, attr_fid = p9_client_xattrwalk(fid, name, &attr_size); if (IS_ERR(attr_fid)) { retval = PTR_ERR(attr_fid); - P9_DPRINTK(P9_DEBUG_VFS, - "p9_client_attrwalk failed %zd\n", retval); + p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n", + retval); attr_fid = NULL; goto error; } @@ -87,8 +87,8 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, { struct p9_fid *fid; - P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n", - __func__, name, buffer_size); + p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n", + name, buffer_size); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -115,8 +115,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, int retval, msize, write_count; struct p9_fid *fid = NULL; - P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n", - __func__, name, value_len, flags); + p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", + name, value_len, flags); fid = v9fs_fid_clone(dentry); if (IS_ERR(fid)) { @@ -129,8 +129,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, */ retval = p9_client_xattrcreate(fid, name, value_len, flags); if (retval < 0) { - P9_DPRINTK(P9_DEBUG_VFS, - "p9_client_xattrcreate failed %d\n", retval); + p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", + retval); goto error; } msize = fid->clnt->msize; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 2d70b95b3b5..7184853ca36 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -63,30 +63,16 @@ enum p9_debug_flags { #ifdef CONFIG_NET_9P_DEBUG extern unsigned int p9_debug_level; - -#define P9_DPRINTK(level, format, arg...) \ -do { \ - if ((p9_debug_level & level) == level) {\ - if (level == P9_DEBUG_9P) \ - printk(KERN_NOTICE "(%8.8d) " \ - format , task_pid_nr(current) , ## arg); \ - else \ - printk(KERN_NOTICE "-- %s (%d): " \ - format , __func__, task_pid_nr(current) , ## arg); \ - } \ -} while (0) - +__printf(3, 4) +void _p9_debug(enum p9_debug_flags level, const char *func, + const char *fmt, ...); +#define p9_debug(level, fmt, ...) \ + _p9_debug(level, __func__, fmt, ##__VA_ARGS__) #else -#define P9_DPRINTK(level, format, arg...) do { } while (0) +#define p9_debug(level, fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) #endif - -#define P9_EPRINTK(level, format, arg...) \ -do { \ - printk(level "9p: %s (%d): " \ - format , __func__, task_pid_nr(current), ## arg); \ -} while (0) - /** * enum p9_msg_t - 9P message types * @P9_TLERROR: not used diff --git a/net/9p/client.c b/net/9p/client.c index 854ca7a911c..776618cd2be 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -81,15 +83,15 @@ static int get_protocol_version(char *s) if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); } else - printk(KERN_INFO "9p: Unknown protocol version %s.\n", s); + pr_info("Unknown protocol version %s\n", s); return version; } @@ -119,8 +121,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) tmp_options = kstrdup(opts, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -134,8 +136,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) case Opt_msize: r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } @@ -145,15 +147,14 @@ static int parse_opts(char *opts, struct p9_client *clnt) s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of trans arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of trans arg\n"); goto free_and_return; } clnt->trans_mod = v9fs_get_trans_by_name(s); if (clnt->trans_mod == NULL) { - printk(KERN_INFO - "9p: Could not find " - "request transport: %s\n", s); + pr_info("Could not find request transport: %s\n", + s); ret = -EINVAL; kfree(s); goto free_and_return; @@ -167,8 +168,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of version arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); goto free_and_return; } ret = get_protocol_version(s); @@ -225,7 +226,7 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) sizeof(struct p9_req_t), GFP_ATOMIC); if (!c->reqs[row]) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); spin_unlock_irqrestore(&c->lock, flags); return ERR_PTR(-ENOMEM); } @@ -244,7 +245,7 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) if (!req->tc) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); if (!req->wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); @@ -253,7 +254,7 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); if ((!req->tc) || (!req->rc)) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); kfree(req->wq); @@ -343,9 +344,9 @@ static void p9_tag_cleanup(struct p9_client *c) for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { for (col = 0; col < P9_ROW_MAXTAG; col++) { if (c->reqs[row][col].status != REQ_STATUS_IDLE) { - P9_DPRINTK(P9_DEBUG_MUX, - "Attempting to cleanup non-free tag %d,%d\n", - row, col); + p9_debug(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); /* TODO: delay execution of cleanup */ return; } @@ -379,7 +380,7 @@ static void p9_tag_cleanup(struct p9_client *c) static void p9_free_req(struct p9_client *c, struct p9_req_t *r) { int tag = r->tc->tag; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) @@ -394,9 +395,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req) { - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -431,8 +432,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, pdu->id = r_type; pdu->tag = r_tag; - P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); if (type) *type = r_type; @@ -473,7 +474,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) */ trace_9p_protocol_dump(c, req->rc); if (err) { - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } if (type != P9_RERROR && type != P9_RLERROR) @@ -492,21 +493,21 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", - -ecode, ename); + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } return err; out_err: - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } @@ -538,7 +539,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, */ trace_9p_protocol_dump(c, req->rc); if (err) { - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } @@ -601,22 +602,22 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", - -ecode, ename); + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } return err; out_free: kfree(ename); out_err: - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } @@ -645,7 +646,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (err) return err; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); if (IS_ERR(req)) @@ -670,7 +671,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, int tag, err; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); /* we allow for any status other than disconnected */ if (c->status == Disconnected) @@ -744,11 +745,11 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { - P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } if ((err == -ERESTARTSYS) && (c->status == Connected)) { - P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -827,11 +828,11 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, goto reterr; } if (req->status == REQ_STATUS_ERROR) { - P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } if ((err == -ERESTARTSYS) && (c->status == Connected)) { - P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -865,7 +866,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -898,7 +899,7 @@ static void p9_fid_destroy(struct p9_fid *fid) struct p9_client *clnt; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); spin_lock_irqsave(&clnt->lock, flags); @@ -915,8 +916,8 @@ static int p9_client_version(struct p9_client *c) char *version; int msize; - P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", - c->msize, c->proto_version); + p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); switch (c->proto_version) { case p9_proto_2000L: @@ -941,12 +942,12 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { - P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); + p9_debug(P9_DEBUG_9P, "version error %d\n", err); trace_9p_protocol_dump(c, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); if (!strncmp(version, "9P2000.L", 8)) c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) @@ -996,8 +997,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); + p9_debug(P9_DEBUG_ERROR, + "No transport defined or default transport\n"); goto destroy_tagpool; } @@ -1007,8 +1008,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto put_trans; } - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); + p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) @@ -1041,7 +1042,7 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt); if (clnt->trans_mod) clnt->trans_mod->close(clnt); @@ -1049,7 +1050,7 @@ void p9_client_destroy(struct p9_client *clnt) v9fs_put_trans(clnt->trans_mod); list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { - printk(KERN_INFO "Found fid %d not clunked\n", fid->fid); + pr_info("Found fid %d not clunked\n", fid->fid); p9_fid_destroy(fid); } @@ -1064,14 +1065,14 @@ EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); void p9_client_begin_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = BeginDisconnect; } EXPORT_SYMBOL(p9_client_begin_disconnect); @@ -1085,8 +1086,8 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); + p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -1108,10 +1109,8 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, - (unsigned long long)qid.path, - qid.version); + p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -1151,8 +1150,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", - oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, nwname, wnames); @@ -1169,7 +1168,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); if (nwqids != nwname) { err = -ENOENT; @@ -1177,7 +1176,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, } for (count = 0; count < nwqids; count++) - P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", count, wqids[count].type, (unsigned long long)wqids[count].path, wqids[count].version); @@ -1212,7 +1211,7 @@ int p9_client_open(struct p9_fid *fid, int mode) int iounit; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); err = 0; @@ -1234,7 +1233,7 @@ int p9_client_open(struct p9_fid *fid, int mode) goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1256,7 +1255,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, struct p9_req_t *req; int iounit; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", ofid->fid, name, flags, mode, gid); clnt = ofid->clnt; @@ -1277,7 +1276,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", qid->type, (unsigned long long)qid->path, qid->version, iounit); @@ -1301,7 +1300,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", fid->fid, name, perm, mode); err = 0; clnt = fid->clnt; @@ -1322,7 +1321,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1344,7 +1343,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", dfid->fid, name, symtgt); clnt = dfid->clnt; @@ -1361,7 +1360,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); free_and_error: @@ -1376,7 +1375,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", dfid->fid, oldfid->fid, newname); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, @@ -1384,7 +1383,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) if (IS_ERR(req)) return PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); p9_free_req(clnt, req); return 0; } @@ -1396,7 +1395,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", + p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", fid->fid, datasync); err = 0; clnt = fid->clnt; @@ -1407,7 +1406,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); p9_free_req(clnt, req); @@ -1423,12 +1422,13 @@ int p9_client_clunk(struct p9_fid *fid) struct p9_req_t *req; if (!fid) { - P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n"); + pr_warn("%s (%d): Trying to clunk with NULL fid\n", + __func__, task_pid_nr(current)); dump_stack(); return 0; } - P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); err = 0; clnt = fid->clnt; @@ -1438,7 +1438,7 @@ int p9_client_clunk(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1456,7 +1456,7 @@ int p9_client_remove(struct p9_fid *fid) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; clnt = fid->clnt; @@ -1466,7 +1466,7 @@ int p9_client_remove(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1481,7 +1481,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", dfid->fid, name, flags); clnt = dfid->clnt; @@ -1490,7 +1490,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); p9_free_req(clnt, req); error: @@ -1509,7 +1509,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, int err, rsize, non_zc = 0; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1552,7 +1552,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); if (non_zc) { if (data) { @@ -1584,7 +1584,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1626,7 +1626,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); p9_free_req(clnt, req); return count; @@ -1646,7 +1646,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) struct p9_req_t *req; u16 ignored; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); if (!ret) return ERR_PTR(-ENOMEM); @@ -1667,7 +1667,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1696,7 +1696,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, GFP_KERNEL); struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", fid->fid, request_mask); if (!ret) @@ -1718,7 +1718,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n" "<<< qid=%x.%llx.%x\n" "<<< st_mode=%8.8x st_nlink=%llu\n" @@ -1784,8 +1784,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); - P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1802,7 +1802,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1818,8 +1818,8 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n" " atime_sec=%lld atime_nsec=%lld\n" " mtime_sec=%lld mtime_nsec=%lld\n", @@ -1833,7 +1833,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -1849,7 +1849,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); if (IS_ERR(req)) { @@ -1866,7 +1866,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " "fsid %llu namelen %ld\n", fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, @@ -1889,7 +1889,7 @@ int p9_client_rename(struct p9_fid *fid, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", + p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", fid->fid, newdirfid->fid, name); req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, @@ -1899,7 +1899,7 @@ int p9_client_rename(struct p9_fid *fid, goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1917,7 +1917,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, err = 0; clnt = olddirfid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" " newdirfid %d new name %s\n", olddirfid->fid, old_name, newdirfid->fid, new_name); @@ -1928,7 +1928,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", newdirfid->fid, new_name); p9_free_req(clnt, req); @@ -1956,7 +1956,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, attr_fid = NULL; goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", file_fid->fid, attr_fid->fid, attr_name); @@ -1973,7 +1973,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, goto clunk_fid; } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", attr_fid->fid, *attr_size); return attr_fid; clunk_fid: @@ -1994,7 +1994,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", fid->fid, name, (long long)attr_size, flags); err = 0; @@ -2005,7 +2005,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -2019,7 +2019,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) struct p9_req_t *req; char *dataptr; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", fid->fid, (long long unsigned) offset, count); err = 0; @@ -2056,7 +2056,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); if (non_zc) memmove(data, dataptr, count); @@ -2080,7 +2080,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); @@ -2092,7 +2092,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -2111,7 +2111,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", fid->fid, name, mode, gid); req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, gid); @@ -2123,7 +2123,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -2141,7 +2141,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " "start %lld length %lld proc_id %d client_id %s\n", fid->fid, flock->type, flock->flags, flock->start, flock->length, flock->proc_id, flock->client_id); @@ -2158,7 +2158,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); + p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: p9_free_req(clnt, req); return err; @@ -2174,7 +2174,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); @@ -2191,7 +2191,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " "proc_id %d client_id %s\n", glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); error: @@ -2208,7 +2208,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); if (IS_ERR(req)) @@ -2219,7 +2219,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); + p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); error: p9_free_req(clnt, req); return err; diff --git a/net/9p/error.c b/net/9p/error.c index 52518512a93..2ab2de76010 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -27,6 +27,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -237,8 +239,8 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: server reported unknown error %s\n", - __func__, errstr); + pr_err("%s: server reported unknown error %s\n", + __func__, errstr); errno = ESERVERFAULT; } diff --git a/net/9p/mod.c b/net/9p/mod.c index 2664d129229..6ab36aea772 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -24,7 +24,11 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include +#include +#include #include #include #include @@ -39,6 +43,29 @@ unsigned int p9_debug_level = 0; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); + +void _p9_debug(enum p9_debug_flags level, const char *func, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if ((p9_debug_level & level) != level) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (level == P9_DEBUG_9P) + pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); + else + pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(_p9_debug); #endif /* @@ -147,7 +174,7 @@ static int __init init_p9(void) int ret = 0; p9_error_init(); - printk(KERN_INFO "Installing 9P2000 support\n"); + pr_info("Installing 9P2000 support\n"); p9_trans_fd_init(); return ret; @@ -160,7 +187,7 @@ static int __init init_p9(void) static void __exit exit_p9(void) { - printk(KERN_INFO "Unloading 9P2000 support\n"); + pr_info("Unloading 9P2000 support\n"); p9_trans_fd_exit(); } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 55e10a96c90..9ee48cb3017 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -534,7 +534,7 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); } @@ -558,8 +558,8 @@ int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) pdu->size = size; trace_9p_protocol_dump(clnt, pdu); - P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); return err; } @@ -585,7 +585,7 @@ int p9dirent_read(struct p9_client *clnt, char *buf, int len, ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fdfdb5747f6..fccae26fa67 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -25,6 +25,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -191,7 +193,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) unsigned long flags; LIST_HEAD(cancel_list); - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock_irqsave(&m->client->lock, flags); @@ -217,7 +219,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); p9_client_cb(m->client, req); } @@ -275,7 +277,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) @@ -299,7 +301,7 @@ static void p9_read_work(struct work_struct *work) if (m->err < 0) return; - P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); + p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); if (!m->rbuf) { m->rbuf = m->tmp_buf; @@ -308,11 +310,11 @@ static void p9_read_work(struct work_struct *work) } clear_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, - m->rpos, m->rsize, m->rsize-m->rpos); + p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", + m, m->rpos, m->rsize, m->rsize-m->rpos); err = p9_fd_read(m->client, m->rbuf + m->rpos, m->rsize - m->rpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Rworksched, &m->wsched); return; @@ -325,25 +327,25 @@ static void p9_read_work(struct work_struct *work) if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ u16 tag; - P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); + p9_debug(P9_DEBUG_TRANS, "got new header\n"); n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ if (n >= m->client->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); err = -EIO; goto error; } tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ - P9_DPRINTK(P9_DEBUG_TRANS, - "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); + p9_debug(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); if (!m->req || (m->req->status != REQ_STATUS_SENT && m->req->status != REQ_STATUS_FLSH)) { - P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", - tag); + p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); err = -EIO; goto error; } @@ -364,7 +366,7 @@ static void p9_read_work(struct work_struct *work) /* not an else because some packets (like clunk) have no payload */ if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ - P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + p9_debug(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) m->req->status = REQ_STATUS_RCVD; @@ -384,7 +386,7 @@ static void p9_read_work(struct work_struct *work) n = p9_fd_poll(m->client, NULL); if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } else clear_bit(Rworksched, &m->wsched); @@ -418,7 +420,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); oldfs = get_fs(); set_fs(get_ds()); @@ -460,7 +462,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; - P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); + p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -469,11 +471,11 @@ static void p9_write_work(struct work_struct *work) spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, - m->wsize); + p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", + m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); + p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Wworksched, &m->wsched); return; @@ -497,7 +499,7 @@ static void p9_write_work(struct work_struct *work) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } else clear_bit(Wworksched, &m->wsched); @@ -551,7 +553,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } if (!pwait) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } @@ -573,8 +575,7 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) int n; struct p9_conn *m; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, - client->msize); + p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -591,12 +592,12 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) n = p9_fd_poll(client, &m->pt); if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } @@ -618,7 +619,7 @@ static void p9_poll_mux(struct p9_conn *m) n = p9_fd_poll(m->client, NULL); if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); if (n >= 0) n = -ECONNRESET; p9_conn_cancel(m, n); @@ -626,19 +627,19 @@ static void p9_poll_mux(struct p9_conn *m) if (n & POLLIN) { set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & POLLOUT) { set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } @@ -661,8 +662,8 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) struct p9_trans_fd *ts = client->trans; struct p9_conn *m = ts->conn; - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, - current, req->tc, req->tc->id); + p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", + m, current, req->tc, req->tc->id); if (m->err < 0) return m->err; @@ -686,7 +687,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { int ret = 1; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); @@ -726,8 +727,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -741,8 +742,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (token != Opt_err) { r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } } @@ -801,7 +802,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) csocket->sk->sk_allocation = GFP_NOIO; fd = sock_map_fd(csocket, 0); if (fd < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + pr_err("%s (%d): failed to map fd\n", + __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); return fd; @@ -837,8 +839,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) static void p9_conn_destroy(struct p9_conn *m) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); + p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", + m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); @@ -919,7 +921,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); return err; } @@ -927,9 +930,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); + pr_err("%s (%d): problem connecting socket to %s\n", + __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } @@ -947,8 +949,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; if (strlen(addr) >= UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); + pr_err("%s (%d): address too long: %s\n", + __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } @@ -957,15 +959,16 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); + return err; } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); + pr_err("%s (%d): problem connecting socket: %s: %d\n", + __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } @@ -983,7 +986,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } @@ -1050,7 +1053,7 @@ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); + p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { @@ -1066,7 +1069,7 @@ static void p9_poll_workfn(struct work_struct *work) } spin_unlock_irqrestore(&p9_poll_lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); + p9_debug(P9_DEBUG_TRANS, "finish\n"); } int p9_trans_fd_init(void) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 159c50f1c6b..2c69ddd691a 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -178,8 +180,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -192,8 +194,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) token = match_token(p, tokens, args); r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } switch (token) { @@ -301,8 +303,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, return; err_out: - P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", - req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; } @@ -318,8 +319,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, static void qp_event_handler(struct ib_event *event, void *context) { - P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, - context); + p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", + event->event, context); } static void cq_comp_handler(struct ib_cq *cq, void *cq_context) @@ -345,8 +346,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) break; default: - printk(KERN_ERR "9prdma: unexpected completion type, " - "c->wc_op=%d, wc.opcode=%d, status=%d\n", + pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } @@ -356,7 +356,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) static void cq_event_handler(struct ib_event *e, void *v) { - P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); + p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); } static void rdma_destroy_trans(struct p9_trans_rdma *rdma) @@ -407,7 +407,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) return ib_post_recv(rdma->qp, &wr, &bad_wr); error: - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; } @@ -500,7 +500,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) kfree(c); kfree(rpl_context->rc); kfree(rpl_context); - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; err_free1: kfree(rpl_context->rc); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 32aa9834229..330421e5471 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -145,7 +147,7 @@ static void req_done(struct virtqueue *vq) struct p9_req_t *req; unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + p9_debug(P9_DEBUG_TRANS, ": request done\n"); while (1) { spin_lock_irqsave(&chan->lock, flags); @@ -158,8 +160,8 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ wake_up(chan->vc_wq); - P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); - P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); + p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); @@ -257,7 +259,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) unsigned long flags; struct virtio_chan *chan = client->trans; - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req->status = REQ_STATUS_SENT; req_retry: @@ -280,20 +282,19 @@ req_retry: if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); goto req_retry; } else { spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_buf returned failure\n"); return -EIO; } } virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); return 0; } @@ -354,7 +355,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, struct page **in_pages = NULL, **out_pages = NULL; struct virtio_chan *chan = client->trans; - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "virtio request\n"); if (uodata) { out_nr_pages = p9_nr_pages(uodata, outlen); @@ -423,20 +424,19 @@ req_retry_pinned: if (err == -ERESTARTSYS) goto err_out; - P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_buf returned failure\n"); err = -EIO; goto err_out; } } virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); /* @@ -491,7 +491,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); if (!chan) { - printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); + pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto fail; } @@ -592,7 +592,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_unlock(&virtio_9p_lock); if (!found) { - printk(KERN_ERR "9p: no channels available\n"); + pr_err("no channels available\n"); return ret; } diff --git a/net/9p/util.c b/net/9p/util.c index 9c1c9348ac3..6ceeeb384de 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -106,7 +106,7 @@ retry: else if (error) return -1; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); @@ -124,7 +124,7 @@ void p9_idpool_put(int id, struct p9_idpool *p) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p); spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); -- cgit v1.2.3 From df345c674b6366952a21a67604c8f6d489bb7ea7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jan 2012 13:39:49 +0530 Subject: fs/9p: v9fs_stat2inode should update suid/sgid bits. Create a new helper that update the permission bits and use that, instead of opencoding the logic. Reported and bisected by: M. Mohan Kumar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 85c29733d8b..cf3dd6bc537 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -94,6 +94,32 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) return res; } +/** + * p9mode2perm- convert plan9 mode bits to unix permission bits + * @v9ses: v9fs session information + * @stat: p9_wstat from which mode need to be derived + * + */ +static int p9mode2perm(struct v9fs_session_info *v9ses, + struct p9_wstat *stat) +{ + int res; + int mode = stat->mode; + + res = mode & S_IALLUGO; + if (v9fs_proto_dotu(v9ses)) { + if ((mode & P9_DMSETUID) == P9_DMSETUID) + res |= S_ISUID; + + if ((mode & P9_DMSETGID) == P9_DMSETGID) + res |= S_ISGID; + + if ((mode & P9_DMSETVTX) == P9_DMSETVTX) + res |= S_ISVTX; + } + return res; +} + /** * p9mode2unixmode- convert plan9 mode bits to unix mode bits * @v9ses: v9fs session information @@ -107,8 +133,8 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; *rdev = 0; + res = p9mode2perm(v9ses, stat); if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; @@ -142,16 +168,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, } else res |= S_IFREG; - if (v9fs_proto_dotu(v9ses)) { - if ((mode & P9_DMSETUID) == P9_DMSETUID) - res |= S_ISUID; - - if ((mode & P9_DMSETGID) == P9_DMSETGID) - res |= S_ISGID; - - if ((mode & P9_DMSETVTX) == P9_DMSETVTX) - res |= S_ISVTX; - } return res; } @@ -1168,7 +1184,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, set_nlink(inode, i_nlink); } } - mode = stat->mode & S_IALLUGO; + mode = p9mode2perm(v9ses, stat); mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; i_size_write(inode, stat->length); -- cgit v1.2.3 From b6054793069bf08fcf220fff5fb33735d5493594 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jan 2012 10:42:17 -0600 Subject: fs/9p: We should not allocate a new inode when creating hardlines. Don't do new_inode_from fid in case of hardlink creation. This ensures that link count for hardlink files get updated properly. Earlier link count was not updated on removing a hardlink with cache mode enabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index cf3dd6bc537..c8fe480d0db 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -680,26 +680,31 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, goto error; } - /* now walk from the parent so we can get unopened fid */ - fid = p9_client_walk(dfid, 1, &name, 1); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); - fid = NULL; - goto error; - } - - /* instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); - goto error; + if (!(perm & P9_DMLINK)) { + /* now walk from the parent so we can get unopened fid */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + p9_debug(P9_DEBUG_VFS, + "p9_client_walk failed %d\n", err); + fid = NULL; + goto error; + } + /* + * instantiate inode and assign the unopened fid to the dentry + */ + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + p9_debug(P9_DEBUG_VFS, + "inode creation failed %d\n", err); + goto error; + } + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + d_instantiate(dentry, inode); } - err = v9fs_fid_add(dentry, fid); - if (err < 0) - goto error; - d_instantiate(dentry, inode); return ofid; error: if (ofid) -- cgit v1.2.3 From 0aaaf5c424c7ffd6b0c4253251356558b16ef3a2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Dec 2011 16:13:48 -0500 Subject: NFS: Cache state owners after files are closed Servers have a finite amount of memory to store NFSv4 open and lock owners. Moreover, servers may have a difficult time determining when they can reap their state owner table, thanks to gray areas in the NFSv4 protocol specification. Thus clients should be careful to reuse state owners when possible. Currently Linux is not too careful. When a user has closed all her files on one mount point, the state owner's reference count goes to zero, and it is released. The next OPEN allocates a new one. A workload that serially opens and closes files can run through a large number of open owners this way. When a state owner's reference count goes to zero, slap it onto a free list for that nfs_server, with an expiry time. Garbage collect before looking for a state owner. This makes state owners for active users available for re-use. Now that there can be unused state owners remaining at umount time, purge the state owner free list when a server is destroyed. Also be sure not to reclaim unused state owners during state recovery. This change has benefits for the client as well. For some workloads, this approach drops the number of OPEN_CONFIRM calls from the same as the number of OPEN calls, down to just one. This reduces wire traffic and thus open(2) latency. Before this patch, untarring a kernel source tarball shows the OPEN_CONFIRM call counter steadily increasing through the test. With the patch, the OPEN_CONFIRM count remains at 1 throughout the entire untar. As long as the expiry time is kept short, I don't think garbage collection should be terribly expensive, although it does bounce the clp->cl_lock around a bit. [ At some point we should rationalize the use of the nfs_server ->destroy method. ] Signed-off-by: Chuck Lever [Trond: Fixed a garbage collection race and a few efficiency issues] Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 8 +++++ fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4state.c | 89 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 92 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 32ea37198e9..41bd67f80d3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server) rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); } +static void nfs4_destroy_server(struct nfs_server *server) +{ + nfs4_purge_state_owners(server); +} + #else static void nfs4_shutdown_client(struct nfs_client *clp) { @@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); INIT_LIST_HEAD(&server->layouts); + INIT_LIST_HEAD(&server->state_owners_lru); atomic_set(&server->active, 0); @@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, nfs_server_insert_lists(server); server->mount_time = jiffies; + server->destroy = nfs4_destroy_server; out: nfs_free_fattr(fattr); return error; @@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, /* Copy data from the source */ server->nfs_client = source->nfs_client; + server->destroy = source->destroy; atomic_inc(&server->nfs_client->cl_count); nfs_server_copy_userdata(server, source); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 693ae22f873..4d7d0aedc10 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -94,6 +94,8 @@ struct nfs_unique_id { struct nfs4_state_owner { struct nfs_unique_id so_owner_id; struct nfs_server *so_server; + struct list_head so_lru; + unsigned long so_expires; struct rb_node so_server_node; struct rpc_cred *so_cred; /* Associated cred */ @@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_purge_state_owners(struct nfs_server *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6354e4fcc82..a53f33b4ac3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "callback.h" @@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) else if (cred > sp->so_cred) p = &parent->rb_right; else { + if (!list_empty(&sp->so_lru)) + list_del_init(&sp->so_lru); atomic_inc(&sp->so_count); return sp; } @@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) else if (new->so_cred > sp->so_cred) p = &parent->rb_right; else { + if (!list_empty(&sp->so_lru)) + list_del_init(&sp->so_lru); atomic_inc(&sp->so_count); return sp; } @@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void) spin_lock_init(&sp->so_sequence.lock); INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); + INIT_LIST_HEAD(&sp->so_lru); return sp; } @@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) } } +static void nfs4_free_state_owner(struct nfs4_state_owner *sp) +{ + rpc_destroy_wait_queue(&sp->so_sequence.wait); + put_rpccred(sp->so_cred); + kfree(sp); +} + +static void nfs4_gc_state_owners(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state_owner *sp, *tmp; + unsigned long time_min, time_max; + LIST_HEAD(doomed); + + spin_lock(&clp->cl_lock); + time_max = jiffies; + time_min = (long)time_max - (long)clp->cl_lease_time; + list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { + /* NB: LRU is sorted so that oldest is at the head */ + if (time_in_range(sp->so_expires, time_min, time_max)) + break; + list_move(&sp->so_lru, &doomed); + nfs4_remove_state_owner_locked(sp); + } + spin_unlock(&clp->cl_lock); + + list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { + list_del(&sp->so_lru); + nfs4_free_state_owner(sp); + } +} + /** * nfs4_get_state_owner - Look up a state owner given a credential * @server: nfs_server to search @@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, sp = nfs4_find_state_owner_locked(server, cred); spin_unlock(&clp->cl_lock); if (sp != NULL) - return sp; + goto out; new = nfs4_alloc_state_owner(); if (new == NULL) - return NULL; + goto out; new->so_server = server; new->so_cred = cred; spin_lock(&clp->cl_lock); @@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, rpc_destroy_wait_queue(&new->so_sequence.wait); kfree(new); } +out: + nfs4_gc_state_owners(server); return sp; } /** * nfs4_put_state_owner - Release a nfs4_state_owner * @sp: state owner data to release - * */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { - struct nfs_client *clp = sp->so_server->nfs_client; - struct rpc_cred *cred = sp->so_cred; + struct nfs_server *server = sp->so_server; + struct nfs_client *clp = server->nfs_client; if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - nfs4_remove_state_owner_locked(sp); + + if (!RB_EMPTY_NODE(&sp->so_server_node)) { + sp->so_expires = jiffies; + list_add_tail(&sp->so_lru, &server->state_owners_lru); + spin_unlock(&clp->cl_lock); + } else { + nfs4_remove_state_owner_locked(sp); + spin_unlock(&clp->cl_lock); + nfs4_free_state_owner(sp); + } +} + +/** + * nfs4_purge_state_owners - Release all cached state owners + * @server: nfs_server with cached state owners to release + * + * Called at umount time. Remaining state owners will be on + * the LRU with ref count of zero. + */ +void nfs4_purge_state_owners(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state_owner *sp, *tmp; + LIST_HEAD(doomed); + + spin_lock(&clp->cl_lock); + list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { + list_move(&sp->so_lru, &doomed); + nfs4_remove_state_owner_locked(sp); + } spin_unlock(&clp->cl_lock); - rpc_destroy_wait_queue(&sp->so_sequence.wait); - put_rpccred(cred); - kfree(sp); + + list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { + list_del(&sp->so_lru); + nfs4_free_state_owner(sp); + } } static struct nfs4_state * @@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov restart: rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + nfs4_purge_state_owners(server); spin_lock(&clp->cl_lock); for (pos = rb_first(&server->state_owners); pos != NULL; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b5479df8378..ba4d7656ecf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -153,6 +153,7 @@ struct nfs_server { struct rb_root openowner_id; struct rb_root lockowner_id; #endif + struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; void (*destroy)(struct nfs_server *); -- cgit v1.2.3 From b8548894bde94ccee836e210274ff401225e9733 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jan 2012 17:49:12 -0500 Subject: nfsd4: be forgiving in the absence of the recovery directory If the recovery directory doesn't exist, then behavior after a reboot will be suboptimal. But it's unnecessarily harsh to then prevent the nfsv4 server from working at all. Instead just print a warning (already done in nfsd4_init_recdir()) and soldier on. Tested-by: Lior Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 28712e20bb1..eb01fffdc2f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -127,10 +127,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (!rec_file || clp->cl_firststate) + if (clp->cl_firststate) return 0; - clp->cl_firststate = 1; + if (!rec_file) + return -ENOENT; status = nfs4_save_creds(&original_cred); if (status < 0) return status; -- cgit v1.2.3 From 9b4146e85536ebd6b989f43906986e34486efdba Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Jan 2012 16:26:43 -0500 Subject: NFSD: Change name of extended attribute containing junction As of fedfs-utils-0.8.0, user space stores all NFS junction information in a single extended attribute: "trusted.junction.nfs". Both FedFS and NFS basic junctions are stored in this one attribute, and the intention is that all future forms of NFS junction metadata will be stored in this attribute. Other protocols may use a different extended attribute. Thus NFSD needs to look only for that one extended attribute. The "trusted.junction.type" xattr is deprecated. fedfs-utils-0.8.0 will continue to attach a "trusted.junction.type" xattr to junctions, but future fedfs-utils releases may no longer do that. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7a2e442623c..89689130621 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac return error; } -#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." -#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" +/* + * NFS junction information is stored in an extended attribute. + */ +#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs" + +/** + * nfsd4_is_junction - Test if an object could be an NFS junction + * + * @dentry: object to test + * + * Returns 1 if "dentry" appears to contain NFS junction information. + * Otherwise 0 is returned. + */ int nfsd4_is_junction(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) + if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) return 0; return 1; } -- cgit v1.2.3 From 7a6ef8c72314f254c107c6a9ed7cb201961ee05a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 5 Jan 2012 15:38:41 -0500 Subject: nfsd4: nfsd4_create_clid_dir return value is unused Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 10 ++++------ fs/nfsd/state.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index eb01fffdc2f..a52f267f122 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -117,8 +117,7 @@ out_no_tfm: return status; } -int -nfsd4_create_clid_dir(struct nfs4_client *clp) +void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char *dname = clp->cl_recdir; @@ -128,13 +127,13 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); if (clp->cl_firststate) - return 0; + return; clp->cl_firststate = 1; if (!rec_file) - return -ENOENT; + return; status = nfs4_save_creds(&original_cred); if (status < 0) - return status; + return; dir = rec_file->f_path.dentry; /* lock the parent */ @@ -172,7 +171,6 @@ out_unlock: " and is writeable", status, user_recovery_dirname); nfs4_reset_creds(original_cred); - return status; } typedef int (recdir_func)(struct dentry *, struct dentry *); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 89c2cd84d79..ffb5df1db94 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -483,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); extern void nfsd4_recdir_purge_old(void); -extern int nfsd4_create_clid_dir(struct nfs4_client *clp); +extern void nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); -- cgit v1.2.3 From 69f594a38967f4540ce7a29b3fd214e68a8330bd Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Jan 2012 12:25:15 -0500 Subject: ptrace: do not audit capability check when outputing /proc/pid/stat Reading /proc/pid/stat of another process checks if one has ptrace permissions on that process. If one does have permissions it outputs some data about the process which might have security and attack implications. If the current task does not have ptrace permissions the read still works, but those fields are filled with inocuous (0) values. Since this check and a subsequent denial is not a violation of the security policy we should not audit such denials. This can be quite useful to removing ptrace broadly across a system without flooding the logs when ps is run or something which harmlessly walks proc. Signed-off-by: Eric Paris Acked-by: Serge E. Hallyn --- fs/proc/array.c | 2 +- include/linux/ptrace.h | 5 +++-- kernel/ptrace.c | 12 ++++++++++-- security/selinux/hooks.c | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 3a1dafd228d..ddffd7a88b9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -380,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, state = *get_task_state(task); vsize = eip = esp = 0; - permitted = ptrace_may_access(task, PTRACE_MODE_READ); + permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 800f113bea6..a27e56ca41a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -127,8 +127,9 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -#define PTRACE_MODE_READ 1 -#define PTRACE_MODE_ATTACH 2 +#define PTRACE_MODE_READ 0x01 +#define PTRACE_MODE_ATTACH 0x02 +#define PTRACE_MODE_NOAUDIT 0x04 /* Returns 0 on success, -errno on denial. */ extern int __ptrace_may_access(struct task_struct *task, unsigned int mode); /* Returns true on success, false on denial. */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 210bbf045ee..c890ac9a796 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -161,6 +161,14 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } +static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) +{ + if (mode & PTRACE_MODE_NOAUDIT) + return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); + else + return has_ns_capability(current, ns, CAP_SYS_PTRACE); +} + int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; @@ -187,7 +195,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) cred->gid == tcred->sgid && cred->gid == tcred->gid)) goto ok; - if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) + if (ptrace_has_cap(tcred->user->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -196,7 +204,7 @@ ok: smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); - if (!dumpable && !ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) + if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) return -EPERM; return security_ptrace_access_check(task, mode); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c9605c4a2e0..14f94cd29c8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1809,7 +1809,7 @@ static int selinux_ptrace_access_check(struct task_struct *child, if (rc) return rc; - if (mode == PTRACE_MODE_READ) { + if (mode & PTRACE_MODE_READ) { u32 sid = current_sid(); u32 csid = task_sid(child); return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); -- cgit v1.2.3 From 5c0b4129c07b902b27d3f3ebc087757f534a3abd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 6 Jan 2012 09:28:12 +0200 Subject: pnfs-obj: pNFS errors are communicated on iodata->pnfs_error Some time along the way pNFS IO errors were switched to communicate with a special iodata->pnfs_error member instead of the regular RPC members. But objlayout was not switched over. Fix that! Without this fix any IO error is hanged, because IO is not switched to MDS and pages are never cleared or read. [Applies to 3.2.0. Same bug different patch for 3.1/0 Kernels] CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objlayout.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 72074e3a04f..b3c29039f5b 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) oir->status = rdata->task.tk_status = status; if (status >= 0) rdata->res.count = status; + else + rdata->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ @@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) if (status >= 0) { wdata->res.count = status; wdata->verf.committed = oir->committed; + } else { + wdata->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ -- cgit v1.2.3 From fe0fe83585f88346557868a803a479dfaaa0688a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 6 Jan 2012 09:31:20 +0200 Subject: pnfs-obj: Must return layout on IO error As mandated by the standard. In case of an IO error, a pNFS objects layout driver must return it's layout. This is because all device errors are reported to the server as part of the layout return buffer. This is implemented the same way PNFS_LAYOUTRET_ON_SETATTR is done, through a bit flag on the pnfs_layoutdriver_type->flags member. The flag is set by the layout driver that wants a layout_return preformed at pnfs_ld_{write,read}_done in case of an error. (Though I have not defined a wrapper like pnfs_ld_layoutret_on_setattr because this code is never called outside of pnfs.c and pnfs IO paths) Without this patch 3.[0-2] Kernels leak memory and have an annoying WARN_ON after every IO error utilizing the pnfs-obj driver. [This patch is for 3.2 Kernel. 3.1/0 Kernels need a different patch] CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 3 ++- fs/nfs/pnfs.c | 12 ++++++++++++ fs/nfs/pnfs.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c807ab93140..55d01280a60 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio_pg_write_ops = { static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", - .flags = PNFS_LAYOUTRET_ON_SETATTR, + .flags = PNFS_LAYOUTRET_ON_SETATTR | + PNFS_LAYOUTRET_ON_ERROR, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8e672a2b2d6..f881a638794 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1178,6 +1178,15 @@ void pnfs_ld_write_done(struct nfs_write_data *data) put_lseg(data->lseg); data->lseg = NULL; dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + /* Don't lo_commit on error, Server will needs to + * preform a file recovery. + */ + clear_bit(NFS_INO_LAYOUTCOMMIT, + &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } } data->mds_ops->rpc_release(data); } @@ -1267,6 +1276,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) put_lseg(data->lseg); data->lseg = NULL; dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) + pnfs_return_layout(data->inode); nfs_pageio_init_read_mds(&pgio, data->inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1509530cb11..53d593a0a4f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -68,6 +68,7 @@ enum { enum layoutdriver_policy_flags { /* Should the pNFS client commit and return the layout upon a setattr */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, + PNFS_LAYOUTRET_ON_ERROR = 1 << 1, }; struct nfs4_deviceid_node; -- cgit v1.2.3 From e2fecb215b321db0e4a5b2597349a63c07bec42f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Jan 2012 08:57:46 -0500 Subject: NFS: Remove pNFS bloat from the generic write path We have no business doing any this in the standard write release path. Get rid of it, and put it in the pNFS layer. Also, while we're at it, get rid of the completely bogus unlock/relock semantics that were present in nfs_writeback_release_full(). It is not only unnecessary, but actually dangerous to release the write lock just in order to take it again in nfs_page_async_flush(). Better just to open code the pgio operations in a pnfs helper. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/pnfs.c | 30 ++++++++++++++++++++++++++++-- fs/nfs/write.c | 27 ++------------------------- 3 files changed, 32 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3f4d95751d5..5ee92538b06 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -307,6 +307,8 @@ extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); +extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f881a638794..17149a49006 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1166,6 +1166,33 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); +static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) +{ + struct nfs_pageio_descriptor pgio; + LIST_HEAD(failed); + + /* Resend all requests through the MDS */ + nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); + + nfs_list_remove_request(req); + if (!nfs_pageio_add_request(&pgio, req)) + nfs_list_add_request(req, &failed); + } + nfs_pageio_complete(&pgio); + + if (!list_empty(&failed)) { + /* For some reason our attempt to resend pages. Mark the + * overall send request as having failed, and let + * nfs_writeback_release_full deal with the error. + */ + list_move(&failed, head); + return -EIO; + } + return 0; +} + /* * Called by non rpc-based layout drivers */ @@ -1175,8 +1202,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data) pnfs_set_layoutcommit(data); data->mds_ops->rpc_call_done(&data->task, data); } else { - put_lseg(data->lseg); - data->lseg = NULL; dprintk("pnfs write error = %d\n", data->pnfs_error); if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { @@ -1187,6 +1212,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) &NFS_I(data->inode)->flags); pnfs_return_layout(data->inode); } + data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); } data->mds_ops->rpc_release(data); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1dda78db6a7..0c3885255f9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1052,7 +1052,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, @@ -1166,13 +1166,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) static void nfs_writeback_release_full(void *calldata) { struct nfs_write_data *data = calldata; - int ret, status = data->task.tk_status; - struct nfs_pageio_descriptor pgio; - - if (data->pnfs_error) { - nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE); - pgio.pg_recoalesce = 1; - } + int status = data->task.tk_status; /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { @@ -1188,11 +1182,6 @@ static void nfs_writeback_release_full(void *calldata) req->wb_bytes, (long long)req_offset(req)); - if (data->pnfs_error) { - dprintk(", pnfs error = %d\n", data->pnfs_error); - goto next; - } - if (status < 0) { nfs_set_pageerror(page); nfs_context_set_write_error(req->wb_context, status); @@ -1212,19 +1201,7 @@ remove_request: next: nfs_clear_page_tag_locked(req); nfs_end_page_writeback(page); - if (data->pnfs_error) { - lock_page(page); - nfs_pageio_cond_complete(&pgio, page->index); - ret = nfs_page_async_flush(&pgio, page, 0); - if (ret) { - nfs_set_pageerror(page); - dprintk("rewrite to MDS error = %d\n", ret); - } - unlock_page(page); - } } - if (data->pnfs_error) - nfs_pageio_complete(&pgio); nfs_writedata_release(calldata); } -- cgit v1.2.3 From 831c2dc5f47c1dc79c32229d75065ada1dcc66e1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 29 Nov 2011 15:35:53 -0800 Subject: ore: FIX breakage when MISC_FILESYSTEMS is not set As Reported by Randy Dunlap When MISC_FILESYSTEMS is not enabled and NFS4.1 is: fs/built-in.o: In function `objio_alloc_io_state': objio_osd.c:(.text+0xcb525): undefined reference to `ore_get_rw_state' fs/built-in.o: In function `_write_done': objio_osd.c:(.text+0xcb58d): undefined reference to `ore_check_io' fs/built-in.o: In function `_read_done': ... When MISC_FILESYSTEMS, which is more of a GUI thing then anything else, is not selected. exofs/Kconfig is never examined during Kconfig, and it can not do it's magic stuff to automatically select everything needed. We must split exofs/Kconfig in two. The ore one is always included. And the exofs one is left in it's old place in the menu. [Needed for the 3.2.0 Kernel] CC: Stable Tree Reported-by: Randy Dunlap Signed-off-by: Boaz Harrosh --- fs/Kconfig | 2 ++ fs/exofs/Kconfig | 11 ----------- fs/exofs/Kconfig.ore | 12 ++++++++++++ 3 files changed, 14 insertions(+), 11 deletions(-) create mode 100644 fs/exofs/Kconfig.ore (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 5f4c45d4aa1..6ad58a59cf5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -218,6 +218,8 @@ source "fs/exofs/Kconfig" endif # MISC_FILESYSTEMS +source "fs/exofs/Kconfig.ore" + menuconfig NETWORK_FILESYSTEMS bool "Network File Systems" default y diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index da42f32c49b..86194b2f799 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig @@ -1,14 +1,3 @@ -# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects -# for every ORE user we do it like this. Any user should add itself here -# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are -# selected here, and we default to "ON". So in effect it is like been -# selected by any of the users. -config ORE - tristate - depends on EXOFS_FS || PNFS_OBJLAYOUT - select ASYNC_XOR - default SCSI_OSD_ULD - config EXOFS_FS tristate "exofs: OSD based file system support" depends on SCSI_OSD_ULD diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore new file mode 100644 index 00000000000..1ca7fb7b6ba --- /dev/null +++ b/fs/exofs/Kconfig.ore @@ -0,0 +1,12 @@ +# ORE - Objects Raid Engine (libore.ko) +# +# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects +# for every ORE user we do it like this. Any user should add itself here +# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are +# selected here, and we default to "ON". So in effect it is like been +# selected by any of the users. +config ORE + tristate + depends on EXOFS_FS || PNFS_OBJLAYOUT + select ASYNC_XOR + default SCSI_OSD_ULD -- cgit v1.2.3 From ffefb8eaa367e8a5c14f779233d9da1fbc23d164 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 27 Dec 2011 19:23:36 +0200 Subject: ore: Fix crash in case of an IO error. The users of ore_check_io() expect the reported device (In case of error) to be indexed relative to the passed-in ore_components table, and not the logical dev index. This causes a crash inside objlayoutdriver in case of an IO error. [Bug in 3.2.0 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/exofs/ore.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index d271ad83720..894f3e192e6 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) u64 residual = ios->reading ? or->in.residual : or->out.residual; u64 offset = (ios->offset + ios->length) - residual; - struct ore_dev *od = ios->oc->ods[ - per_dev->dev - ios->oc->first_dev]; + unsigned dev = per_dev->dev - ios->oc->first_dev; + struct ore_dev *od = ios->oc->ods[dev]; - on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, + on_dev_error(ios, od, dev, osi.osd_err_pri, offset, residual); } if (osi.osd_err_pri >= acumulated_osd_err) { -- cgit v1.2.3 From 361aba569f55dd159b850489a3538253afbb3973 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 28 Dec 2011 19:14:23 +0200 Subject: ore: fix BUG_ON, too few sgs when reading When reading RAID5 files, in rare cases, we calculated too few sg segments. There should be two extra for the beginning and end partial units. Also "too few sg segments" should not be a BUG_ON there is all the mechanics in place to handle it, as a short read. So just return -ENOMEM and the rest of the code will gracefully split the IO. [Bug in 3.2.0 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/exofs/ore.c | 2 +- fs/exofs/ore_raid.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 894f3e192e6..49cf230554a 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, /* first/last seg is split */ num_raid_units += layout->group_width; - sgs_per_dev = div_u64(num_raid_units, data_devs); + sgs_per_dev = div_u64(num_raid_units, data_devs) + 2; } else { /* For Writes add parity pages array. */ max_par_pages = num_raid_units * pages_in_unit * diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 29c47e5c4a8..414a2dfd950 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -551,7 +551,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, unsigned cur_len) { if (ios->reading) { - BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); + if (per_dev->cur_sg >= ios->sgs_per_dev) { + ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" , + per_dev->cur_sg, ios->sgs_per_dev); + return -ENOMEM; + } _ore_add_sg_seg(per_dev, cur_len, true); } else { struct __stripe_pages_2d *sp2d = ios->sp2d; -- cgit v1.2.3 From f766619db2be059cd0dbba8a36176fe01a29d588 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 18 Dec 2011 23:03:13 +0530 Subject: fs/9p: iattr_valid flags are kernel internal flags map them to 9p values. Kernel internal values can change, add protocol values for these constant and use them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode_dotl.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 73488fb69d3..7ca7171273d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -522,6 +522,46 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, return 0; } +/* + * Attribute flags. + */ +#define P9_ATTR_MODE (1 << 0) +#define P9_ATTR_UID (1 << 1) +#define P9_ATTR_GID (1 << 2) +#define P9_ATTR_SIZE (1 << 3) +#define P9_ATTR_ATIME (1 << 4) +#define P9_ATTR_MTIME (1 << 5) +#define P9_ATTR_CTIME (1 << 6) +#define P9_ATTR_ATIME_SET (1 << 7) +#define P9_ATTR_MTIME_SET (1 << 8) + +struct dotl_iattr_map { + int iattr_valid; + int p9_iattr_valid; +}; + +static int v9fs_mapped_iattr_valid(int iattr_valid) +{ + int i; + int p9_iattr_valid = 0; + struct dotl_iattr_map dotl_iattr_map[] = { + { ATTR_MODE, P9_ATTR_MODE }, + { ATTR_UID, P9_ATTR_UID }, + { ATTR_GID, P9_ATTR_GID }, + { ATTR_SIZE, P9_ATTR_SIZE }, + { ATTR_ATIME, P9_ATTR_ATIME }, + { ATTR_MTIME, P9_ATTR_MTIME }, + { ATTR_CTIME, P9_ATTR_CTIME }, + { ATTR_ATIME_SET, P9_ATTR_ATIME_SET }, + { ATTR_MTIME_SET, P9_ATTR_MTIME_SET }, + }; + for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) { + if (iattr_valid & dotl_iattr_map[i].iattr_valid) + p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid; + } + return p9_iattr_valid; +} + /** * v9fs_vfs_setattr_dotl - set file metadata * @dentry: file whose metadata to set @@ -542,7 +582,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) if (retval) return retval; - p9attr.valid = iattr->ia_valid; + p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); p9attr.mode = iattr->ia_mode; p9attr.uid = iattr->ia_uid; p9attr.gid = iattr->ia_gid; -- cgit v1.2.3 From 203bf287cb01a5dc26c20bd3737cecf3aeba1d48 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Jan 2012 15:23:57 -0500 Subject: Btrfs: run chunk allocations while we do delayed refs Btrfs tries to batch extent allocation tree changes to improve performance and reduce metadata trashing. But it doesn't allocate new metadata chunks while it is doing allocations for the extent allocation tree. This commit changes the delayed refence code to do chunk allocations if we're getting low on room. It prevents crashes and improves performance. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++++---- fs/btrfs/transaction.c | 9 +-------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f5fbe576d2b..71549d11a09 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2267,9 +2267,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, BUG_ON(ret); kfree(extent_op); - cond_resched(); - spin_lock(&delayed_refs->lock); - continue; + goto next; } list_del_init(&locked_ref->cluster); @@ -2289,7 +2287,11 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(ref); kfree(extent_op); count++; - +next: + do_chunk_alloc(trans, root->fs_info->extent_root, + 2 * 1024 * 1024, + btrfs_get_alloc_profile(root, 0), + CHUNK_ALLOC_NO_FORCE); cond_resched(); spin_lock(&delayed_refs->lock); } @@ -2317,6 +2319,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; + do_chunk_alloc(trans, root->fs_info->extent_root, + 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), + CHUNK_ALLOC_NO_FORCE); + delayed_refs = &trans->transaction->delayed_refs; INIT_LIST_HEAD(&cluster); again: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 81376d94cd3..360c2dfd1ee 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -467,19 +467,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; - while (count < 4) { + while (count < 2) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (cur && trans->transaction->delayed_refs.num_heads_ready > 64) { trans->delayed_ref_updates = 0; - - /* - * do a full flush if the transaction is trying - * to close - */ - if (trans->transaction->delayed_refs.flushing) - cur = 0; btrfs_run_delayed_refs(trans, root, cur); } else { break; -- cgit v1.2.3 From cf1d72c9ceec391d34c48724da57282e97f01122 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Jan 2012 15:41:34 -0500 Subject: Btrfs: lower the bar for chunk allocation The chunk allocation code has tried to keep a pretty tight lid on creating new metadata chunks. This is partially because in the past the reservation code didn't give us an accurate idea of how much space was being used. The new code is much more accurate, so we're able to get rid of some of these checks. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71549d11a09..247d2c94f8e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3263,27 +3263,12 @@ static int should_alloc_chunk(struct btrfs_root *root, if (num_bytes - num_allocated < thresh) return 1; } - - /* - * we have two similar checks here, one based on percentage - * and once based on a hard number of 256MB. The idea - * is that if we have a good amount of free - * room, don't allocate a chunk. A good mount is - * less than 80% utilized of the chunks we have allocated, - * or more than 256MB free - */ - if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) - return 0; - - if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) - return 0; - thresh = btrfs_super_total_bytes(root->fs_info->super_copy); - /* 256MB or 5% of the FS */ - thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); + /* 256MB or 2% of the FS */ + thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2)); - if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) + if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8)) return 0; return 1; } -- cgit v1.2.3 From 1100373f8aa69e377386499350496e3d8565605f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Jan 2012 15:47:38 -0500 Subject: Btrfs: use bigger metadata chunks on bigger filesystems The 256MB chunk is a little small on a huge FS. This scales up the chunk size. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9..ac00e3aa80a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2441,7 +2441,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_stripe_size = 1024 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - max_stripe_size = 256 * 1024 * 1024; + /* for larger filesystems, use larger metadata chunks */ + if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) + max_stripe_size = 1024 * 1024 * 1024; + else + max_stripe_size = 256 * 1024 * 1024; max_chunk_size = max_stripe_size; } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = 8 * 1024 * 1024; -- cgit v1.2.3 From a5f6f719a5cd7caeee8ed8137cf3f94c3bbebc65 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 12 Dec 2011 04:48:19 -0200 Subject: Btrfs: test free space only for unclustered allocation Since the clustered allocation may be taking extents from a different block group, there's no point in spin-locking and testing the current block group free space before attempting to allocate space from a cluster, even more so when we might refrain from even trying the cluster in the current block group because, after the cluster was set up, not enough free space remained. Furthermore, cluster creation attempts fail fast when the block group doesn't have enough free space, so the test was completely superfluous. I've move the free space test past the cluster allocation attempt, where it is more useful, and arranged for a cluster in the current block group to be released before trying an unclustered allocation, when we reach the LOOP_NO_EMPTY_SIZE stage, so that the free space in the cluster stands a chance of being combined with additional free space in the block group so as to succeed in the allocation attempt. Signed-off-by: Alexandre Oliva Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 247d2c94f8e..5ea3acc5324 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5286,15 +5286,6 @@ alloc: if (unlikely(block_group->ro)) goto loop; - spin_lock(&block_group->free_space_ctl->tree_lock); - if (cached && - block_group->free_space_ctl->free_space < - num_bytes + empty_cluster + empty_size) { - spin_unlock(&block_group->free_space_ctl->tree_lock); - goto loop; - } - spin_unlock(&block_group->free_space_ctl->tree_lock); - /* * Ok we want to try and use the cluster allocator, so * lets look there @@ -5340,8 +5331,15 @@ refill_cluster: * plenty of times and not have found * anything, so we are likely way too * fragmented for the clustering stuff to find - * anything. */ - if (loop >= LOOP_NO_EMPTY_SIZE) { + * anything. + * + * However, if the cluster is taken from the + * current block group, release the cluster + * first, so that we stand a better chance of + * succeeding in the unclustered + * allocation. */ + if (loop >= LOOP_NO_EMPTY_SIZE && + last_ptr->block_group != block_group) { spin_unlock(&last_ptr->refill_lock); goto unclustered_alloc; } @@ -5352,6 +5350,11 @@ refill_cluster: */ btrfs_return_cluster_to_free_space(NULL, last_ptr); + if (loop >= LOOP_NO_EMPTY_SIZE) { + spin_unlock(&last_ptr->refill_lock); + goto unclustered_alloc; + } + /* allocate a cluster in this block group */ ret = btrfs_find_space_cluster(trans, root, block_group, last_ptr, @@ -5392,6 +5395,15 @@ refill_cluster: } unclustered_alloc: + spin_lock(&block_group->free_space_ctl->tree_lock); + if (cached && + block_group->free_space_ctl->free_space < + num_bytes + empty_cluster + empty_size) { + spin_unlock(&block_group->free_space_ctl->tree_lock); + goto loop; + } + spin_unlock(&block_group->free_space_ctl->tree_lock); + offset = btrfs_find_space_for_alloc(block_group, search_start, num_bytes, empty_size); /* -- cgit v1.2.3 From 6926afd1925a54a13684ebe05987868890665e2b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 7 Jan 2012 13:22:46 -0500 Subject: NFSv4: Save the owner/group name string when doing open ...so that we can do the uid/gid mapping outside the asynchronous RPC context. This fixes a bug in the current NFSv4 atomic open code where the client isn't able to determine what the true uid/gid fields of the file are, (because the asynchronous nature of the OPEN call denies it the ability to do an upcall) and so fills them with default values, marking the inode as needing revalidation. Unfortunately, in some cases, the VFS will do some additional sanity checks on the file, and may override the server's decision to allow the open because it sees the wrong owner/group fields. Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 83 ++++++++++++++++++++++++++++++++++++ fs/nfs/inode.c | 2 + fs/nfs/nfs4proc.c | 10 +++++ fs/nfs/nfs4xdr.c | 106 ++++++++++++++++++++-------------------------- include/linux/nfs_idmap.h | 8 ++++ include/linux/nfs_xdr.h | 17 +++++--- 6 files changed, 162 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 47d1c6ff2d8..2c05f1991e1 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -38,6 +38,89 @@ #include #include #include +#include + +/** + * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields + * @fattr: fully initialised struct nfs_fattr + * @owner_name: owner name string cache + * @group_name: group name string cache + */ +void nfs_fattr_init_names(struct nfs_fattr *fattr, + struct nfs4_string *owner_name, + struct nfs4_string *group_name) +{ + fattr->owner_name = owner_name; + fattr->group_name = group_name; +} + +static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr) +{ + fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME; + kfree(fattr->owner_name->data); +} + +static void nfs_fattr_free_group_name(struct nfs_fattr *fattr) +{ + fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME; + kfree(fattr->group_name->data); +} + +static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr) +{ + struct nfs4_string *owner = fattr->owner_name; + __u32 uid; + + if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)) + return false; + if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) { + fattr->uid = uid; + fattr->valid |= NFS_ATTR_FATTR_OWNER; + } + return true; +} + +static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr) +{ + struct nfs4_string *group = fattr->group_name; + __u32 gid; + + if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)) + return false; + if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) { + fattr->gid = gid; + fattr->valid |= NFS_ATTR_FATTR_GROUP; + } + return true; +} + +/** + * nfs_fattr_free_names - free up the NFSv4 owner and group strings + * @fattr: a fully initialised nfs_fattr structure + */ +void nfs_fattr_free_names(struct nfs_fattr *fattr) +{ + if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME) + nfs_fattr_free_owner_name(fattr); + if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME) + nfs_fattr_free_group_name(fattr); +} + +/** + * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free + * @server: pointer to the filesystem nfs_server structure + * @fattr: a fully initialised nfs_fattr structure + * + * This helper maps the cached NFSv4 owner/group strings in fattr into + * their numeric uid/gid equivalents, and then frees the cached strings. + */ +void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr) +{ + if (nfs_fattr_map_owner_name(server, fattr)) + nfs_fattr_free_owner_name(fattr); + if (nfs_fattr_map_group_name(server, fattr)) + nfs_fattr_free_group_name(fattr); +} static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 50a15fa8cf9..f59cab12a8e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1019,6 +1019,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->valid = 0; fattr->time_start = jiffies; fattr->gencount = nfs_inc_attr_generation_counter(); + fattr->owner_name = NULL; + fattr->group_name = NULL; } struct nfs_fattr *nfs_alloc_fattr(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3b108011845..df3d3068242 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -760,6 +761,8 @@ struct nfs4_opendata { struct nfs_openres o_res; struct nfs_open_confirmargs c_arg; struct nfs_open_confirmres c_res; + struct nfs4_string owner_name; + struct nfs4_string group_name; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; struct dentry *dir; @@ -783,6 +786,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); + nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); } static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, @@ -814,6 +818,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (flags & O_CREAT) { u32 *s; @@ -850,6 +855,7 @@ static void nfs4_opendata_free(struct kref *kref) dput(p->dir); dput(p->dentry); nfs_sb_deactive(sb); + nfs_fattr_free_names(&p->f_attr); kfree(p); } @@ -1574,6 +1580,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) if (status != 0 || !data->rpc_done) return status; + nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); + nfs_refresh_inode(dir, o_res->dir_attr); if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -1606,6 +1614,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return status; } + nfs_fattr_map_and_free_names(server, &data->f_attr); + if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); nfs_post_op_update_inode(dir, o_res->dir_attr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index dcaf69309d8..95e92e43840 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2298,7 +2298,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getfh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + encode_getfattr(xdr, args->dir_bitmask, &hdr); encode_nops(&hdr); } @@ -3792,7 +3792,8 @@ out_overflow: } static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, - const struct nfs_server *server, uint32_t *uid, int may_sleep) + const struct nfs_server *server, uint32_t *uid, + struct nfs4_string *owner_name) { uint32_t len; __be32 *p; @@ -3809,8 +3810,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, p = xdr_inline_decode(xdr, len); if (unlikely(!p)) goto out_overflow; - if (!may_sleep) { - /* do nothing */ + if (owner_name != NULL) { + owner_name->data = kmemdup(p, len, GFP_NOWAIT); + if (owner_name->data != NULL) { + owner_name->len = len; + ret = NFS_ATTR_FATTR_OWNER_NAME; + } } else if (len < XDR_MAX_NETOBJ) { if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) ret = NFS_ATTR_FATTR_OWNER; @@ -3830,7 +3835,8 @@ out_overflow: } static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, - const struct nfs_server *server, uint32_t *gid, int may_sleep) + const struct nfs_server *server, uint32_t *gid, + struct nfs4_string *group_name) { uint32_t len; __be32 *p; @@ -3847,8 +3853,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, p = xdr_inline_decode(xdr, len); if (unlikely(!p)) goto out_overflow; - if (!may_sleep) { - /* do nothing */ + if (group_name != NULL) { + group_name->data = kmemdup(p, len, GFP_NOWAIT); + if (group_name->data != NULL) { + group_name->len = len; + ret = NFS_ATTR_FATTR_GROUP_NAME; + } } else if (len < XDR_MAX_NETOBJ) { if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) ret = NFS_ATTR_FATTR_GROUP; @@ -4285,7 +4295,7 @@ xdr_error: static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, - const struct nfs_server *server, int may_sleep) + const struct nfs_server *server) { int status; umode_t fmode = 0; @@ -4352,12 +4362,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep); + status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name); if (status < 0) goto xdr_error; fattr->valid |= status; - status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep); + status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4398,7 +4408,7 @@ xdr_error: } static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) + struct nfs_fh *fh, const struct nfs_server *server) { __be32 *savep; uint32_t attrlen, @@ -4417,7 +4427,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); if (status < 0) goto xdr_error; @@ -4428,9 +4438,9 @@ xdr_error: } static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, - const struct nfs_server *server, int may_sleep) + const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); + return decode_getfattr_generic(xdr, fattr, NULL, server); } /* @@ -5711,8 +5721,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, status = decode_open_downgrade(xdr, res); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -5738,8 +5747,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_access(xdr, res); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -5768,8 +5776,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr(xdr, res->fattr, res->server - ,!RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -5795,8 +5802,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, goto out; status = decode_getfh(xdr, res->fh); if (status == 0) - status = decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -5822,8 +5828,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_remove(xdr, &res->cinfo); if (status) goto out; - decode_getfattr(xdr, res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->dir_attr, res->server); out: return status; } @@ -5856,14 +5861,12 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; /* Current FH is target directory */ - if (decode_getfattr(xdr, res->new_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(xdr, res->new_fattr, res->server)) goto out; status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr(xdr, res->old_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->old_fattr, res->server); out: return status; } @@ -5899,14 +5902,12 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(xdr, res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(xdr, res->dir_attr, res->server)) goto out; status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -5938,14 +5939,12 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - if (decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(xdr, res->fattr, res->server)) goto out; status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr(xdr, res->dir_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->dir_fattr, res->server); out: return status; } @@ -5977,8 +5976,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6076,8 +6074,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, * an ESTALE error. Shouldn't be a problem, * though, since fattr->valid will remain unset. */ - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6108,13 +6105,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, goto out; if (decode_getfh(xdr, &res->fh) != 0) goto out; - if (decode_getfattr(xdr, res->f_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(xdr, res->f_attr, res->server) != 0) goto out; if (decode_restorefh(xdr) != 0) goto out; - decode_getfattr(xdr, res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->dir_attr, res->server); out: return status; } @@ -6162,8 +6157,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; - decode_getfattr(xdr, res->f_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->f_attr, res->server); out: return status; } @@ -6190,8 +6184,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, status = decode_setattr(xdr); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6371,8 +6364,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; if (res->fattr) - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); if (!status) status = res->count; out: @@ -6401,8 +6393,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; if (res->fattr) - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6561,8 +6552,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, status = decode_delegreturn(xdr); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6591,8 +6581,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, goto out; xdr_enter_page(xdr, PAGE_SIZE); status = decode_getfattr(xdr, &res->fs_locations->fattr, - res->fs_locations->server, - !RPC_IS_ASYNC(req->rq_task)); + res->fs_locations->server); out: return status; } @@ -6841,8 +6830,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, status = decode_layoutcommit(xdr, rqstp, res); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6973,7 +6961,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - entry->server, 1) < 0) + entry->server) < 0) goto out_overflow; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index ae7d6a380da..308c1887701 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -66,6 +66,8 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; struct nfs_server; +struct nfs_fattr; +struct nfs4_string; #ifdef CONFIG_NFS_USE_NEW_IDMAPPER @@ -97,6 +99,12 @@ void nfs_idmap_delete(struct nfs_client *); #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ +void nfs_fattr_init_names(struct nfs_fattr *fattr, + struct nfs4_string *owner_name, + struct nfs4_string *group_name); +void nfs_fattr_free_names(struct nfs_fattr *); +void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *); + int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *); int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *); int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6c898afe609..a764cef06b7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -18,6 +18,11 @@ /* Forward declaration for NFS v3 */ struct nfs4_secinfo_flavors; +struct nfs4_string { + unsigned int len; + char *data; +}; + struct nfs_fsid { uint64_t major; uint64_t minor; @@ -61,6 +66,8 @@ struct nfs_fattr { struct timespec pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; + struct nfs4_string *owner_name; + struct nfs4_string *group_name; }; #define NFS_ATTR_FATTR_TYPE (1U << 0) @@ -85,6 +92,8 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ #define NFS_ATTR_FATTR_MOUNTPOINT (1U << 20) /* Treat as mountpoint */ #define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 21) +#define NFS_ATTR_FATTR_OWNER_NAME (1U << 22) +#define NFS_ATTR_FATTR_GROUP_NAME (1U << 23) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -324,6 +333,7 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + const u32 * dir_bitmask; __u32 claim; struct nfs4_sequence_args seq_args; }; @@ -342,6 +352,8 @@ struct nfs_openres { __u32 do_recall; __u64 maxsize; __u32 attrset[NFS4_BITMAP_SIZE]; + struct nfs4_string *owner; + struct nfs4_string *group_owner; struct nfs4_sequence_res seq_res; }; @@ -778,11 +790,6 @@ struct nfs3_getaclres { struct posix_acl * acl_default; }; -struct nfs4_string { - unsigned int len; - char *data; -}; - #ifdef CONFIG_NFS_V4 typedef u64 clientid4; -- cgit v1.2.3 From fc7c1077ceb99c35e5f9d0ce03dc7740565bb2bf Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 28 Nov 2011 12:36:17 -0200 Subject: Btrfs: don't set up allocation result twice We store the allocation start and length twice in ins, once right after the other, but with intervening calls that may prevent the duplicate from being optimized out by the compiler. Remove one of the assignments. Signed-off-by: Alexandre Oliva Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5ea3acc5324..37594e4bf66 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5441,9 +5441,6 @@ checks: goto loop; } - ins->objectid = search_start; - ins->offset = num_bytes; - if (offset < search_start) btrfs_add_free_space(used_block_group, offset, search_start - offset); -- cgit v1.2.3 From 1bb91902dc90e25449893e693ad45605cb08fbe5 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Fri, 14 Oct 2011 12:10:36 -0300 Subject: Btrfs: revamp clustered allocation logic Parameterize clusters on minimum total size, minimum chunk size and minimum contiguous size for at least one chunk, without limits on cluster, window or gap sizes. Don't tolerate any fragmentation for SSD_SPREAD; accept it for metadata, but try to keep data dense. Signed-off-by: Alexandre Oliva Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 112 +++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 63 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ec23d43d0c3..ce40db59c70 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2283,23 +2283,23 @@ out: static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_space *entry, struct btrfs_free_cluster *cluster, - u64 offset, u64 bytes, u64 min_bytes) + u64 offset, u64 bytes, + u64 cont1_bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; unsigned long next_zero; unsigned long i; - unsigned long search_bits; - unsigned long total_bits; + unsigned long want_bits; + unsigned long min_bits; unsigned long found_bits; unsigned long start = 0; unsigned long total_found = 0; int ret; - bool found = false; i = offset_to_bit(entry->offset, block_group->sectorsize, max_t(u64, offset, entry->offset)); - search_bits = bytes_to_bits(bytes, block_group->sectorsize); - total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); + want_bits = bytes_to_bits(bytes, block_group->sectorsize); + min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); again: found_bits = 0; @@ -2308,7 +2308,7 @@ again: i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { next_zero = find_next_zero_bit(entry->bitmap, BITS_PER_BITMAP, i); - if (next_zero - i >= search_bits) { + if (next_zero - i >= min_bits) { found_bits = next_zero - i; break; } @@ -2318,10 +2318,9 @@ again: if (!found_bits) return -ENOSPC; - if (!found) { + if (!total_found) { start = i; cluster->max_size = 0; - found = true; } total_found += found_bits; @@ -2329,13 +2328,8 @@ again: if (cluster->max_size < found_bits * block_group->sectorsize) cluster->max_size = found_bits * block_group->sectorsize; - if (total_found < total_bits) { - i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); - if (i - start > total_bits * 2) { - total_found = 0; - cluster->max_size = 0; - found = false; - } + if (total_found < want_bits || cluster->max_size < cont1_bytes) { + i = next_zero + 1; goto again; } @@ -2351,23 +2345,23 @@ again: /* * This searches the block group for just extents to fill the cluster with. + * Try to find a cluster with at least bytes total bytes, at least one + * extent of cont1_bytes, and other clusters of at least min_bytes. */ static noinline int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, struct list_head *bitmaps, u64 offset, u64 bytes, - u64 min_bytes) + u64 cont1_bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *first = NULL; struct btrfs_free_space *entry = NULL; - struct btrfs_free_space *prev = NULL; struct btrfs_free_space *last; struct rb_node *node; u64 window_start; u64 window_free; u64 max_extent; - u64 max_gap = 128 * 1024; entry = tree_search_offset(ctl, offset, 0, 1); if (!entry) @@ -2377,8 +2371,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, * We don't want bitmaps, so just move along until we find a normal * extent entry. */ - while (entry->bitmap) { - if (list_empty(&entry->list)) + while (entry->bitmap || entry->bytes < min_bytes) { + if (entry->bitmap && list_empty(&entry->list)) list_add_tail(&entry->list, bitmaps); node = rb_next(&entry->offset_index); if (!node) @@ -2391,12 +2385,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, max_extent = entry->bytes; first = entry; last = entry; - prev = entry; - while (window_free <= min_bytes) { - node = rb_next(&entry->offset_index); - if (!node) - return -ENOSPC; + for (node = rb_next(&entry->offset_index); node; + node = rb_next(&entry->offset_index)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); if (entry->bitmap) { @@ -2405,26 +2396,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, continue; } - /* - * we haven't filled the empty size and the window is - * very large. reset and try again - */ - if (entry->offset - (prev->offset + prev->bytes) > max_gap || - entry->offset - window_start > (min_bytes * 2)) { - first = entry; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; + if (entry->bytes < min_bytes) + continue; + + last = entry; + window_free += entry->bytes; + if (entry->bytes > max_extent) max_extent = entry->bytes; - } else { - last = entry; - window_free += entry->bytes; - if (entry->bytes > max_extent) - max_extent = entry->bytes; - } - prev = entry; } + if (window_free < bytes || max_extent < cont1_bytes) + return -ENOSPC; + cluster->window_start = first->offset; node = &first->offset_index; @@ -2438,7 +2421,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, entry = rb_entry(node, struct btrfs_free_space, offset_index); node = rb_next(&entry->offset_index); - if (entry->bitmap) + if (entry->bitmap || entry->bytes < min_bytes) continue; rb_erase(&entry->offset_index, &ctl->free_space_offset); @@ -2460,7 +2443,7 @@ static noinline int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, struct list_head *bitmaps, u64 offset, u64 bytes, - u64 min_bytes) + u64 cont1_bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; @@ -2485,7 +2468,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, if (entry->bytes < min_bytes) continue; ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, - bytes, min_bytes); + bytes, cont1_bytes, min_bytes); if (!ret) return 0; } @@ -2499,7 +2482,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, /* * here we try to find a cluster of blocks in a block group. The goal - * is to find at least bytes free and up to empty_size + bytes free. + * is to find at least bytes+empty_size. * We might not find them all in one contiguous area. * * returns zero and sets up cluster if things worked out, otherwise @@ -2515,23 +2498,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, struct btrfs_free_space *entry, *tmp; LIST_HEAD(bitmaps); u64 min_bytes; + u64 cont1_bytes; int ret; - /* for metadata, allow allocates with more holes */ + /* + * Choose the minimum extent size we'll require for this + * cluster. For SSD_SPREAD, don't allow any fragmentation. + * For metadata, allow allocates with smaller extents. For + * data, keep it dense. + */ if (btrfs_test_opt(root, SSD_SPREAD)) { - min_bytes = bytes + empty_size; + cont1_bytes = min_bytes = bytes + empty_size; } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { - /* - * we want to do larger allocations when we are - * flushing out the delayed refs, it helps prevent - * making more work as we go along. - */ - if (trans->transaction->delayed_refs.flushing) - min_bytes = max(bytes, (bytes + empty_size) >> 1); - else - min_bytes = max(bytes, (bytes + empty_size) >> 4); - } else - min_bytes = max(bytes, (bytes + empty_size) >> 2); + cont1_bytes = bytes; + min_bytes = block_group->sectorsize; + } else { + cont1_bytes = max(bytes, (bytes + empty_size) >> 2); + min_bytes = block_group->sectorsize; + } spin_lock(&ctl->tree_lock); @@ -2539,7 +2523,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, * If we know we don't have enough space to make a cluster don't even * bother doing all the work to try and find one. */ - if (ctl->free_space < min_bytes) { + if (ctl->free_space < bytes) { spin_unlock(&ctl->tree_lock); return -ENOSPC; } @@ -2553,10 +2537,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, } ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, - bytes, min_bytes); + bytes + empty_size, + cont1_bytes, min_bytes); if (ret) ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, - offset, bytes, min_bytes); + offset, bytes + empty_size, + cont1_bytes, min_bytes); /* Clear our temporary list */ list_for_each_entry_safe(entry, tmp, &bitmaps, list) -- cgit v1.2.3 From bc31b86a5923fad5f3fbb6192f767f410241ba27 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 7 Jan 2012 20:41:55 -0600 Subject: writeback: move MIN_WRITEBACK_PAGES to fs-writeback.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compile error fs/fs-writeback.c:515:33: error: ‘PAGE_CACHE_SHIFT’ undeclared (first use in this function) Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Wu Fengguang --- fs/fs-writeback.c | 6 ++++++ include/linux/writeback.h | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 92d353e069d..22e2d42742a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,11 @@ #include #include "internal.h" +/* + * 4MB minimal write chunk size + */ +#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10)) + /* * Passed into wb_writeback(), essentially a subset of writeback_control */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b30419cd425..4e0a5549302 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -25,11 +25,6 @@ DECLARE_PER_CPU(int, dirty_throttle_leaks); #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) -/* - * 4MB minimal write chunk size - */ -#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10)) - struct backing_dev_info; /* -- cgit v1.2.3 From 724577ca355795b0a25c93ccbeee927871ca1a77 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 28 Dec 2011 19:21:45 +0200 Subject: ore: Must support none-PAGE-aligned IO NFS might send us offsets that are not PAGE aligned. So we must read in the reminder of the first/last pages, in cases we need it for Parity calculations. We only add an sg segments to read the partial page. But we don't mark it as read=true because it is a lock-for-write page. TODO: In some cases (IO spans a single unit) we can just adjust the raid_unit offset/length, but this is left for later Kernels. [Bug in 3.2.0 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/exofs/ore_raid.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 414a2dfd950..d222c77cfa1 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) /* @si contains info of the to-be-inserted page. Update of @si should be * maintained by caller. Specificaly si->dev, si->obj_offset, ... */ -static int _add_to_read_4_write(struct ore_io_state *ios, - struct ore_striping_info *si, struct page *page) +static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si, + struct page *page, unsigned pg_len) { struct request_queue *q; struct ore_per_dev_state *per_dev; @@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, _ore_add_sg_seg(per_dev, gap, true); } q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); - added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); - if (unlikely(added_len != PAGE_SIZE)) { + added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len, + si->obj_offset % PAGE_SIZE); + if (unlikely(added_len != pg_len)) { ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", per_dev->bio->bi_vcnt); return -ENOMEM; } - per_dev->length += PAGE_SIZE; + per_dev->length += pg_len; return 0; } +/* read the beginning of an unaligned first page */ +static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page) +{ + struct ore_striping_info si; + unsigned pg_len; + + ore_calc_stripe_info(ios->layout, ios->offset, 0, &si); + + pg_len = si.obj_offset % PAGE_SIZE; + si.obj_offset -= pg_len; + + ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n", + _LLU(si.obj_offset), pg_len, page->index, si.dev); + + return _add_to_r4w(ios, &si, page, pg_len); +} + +/* read the end of an incomplete last page */ +static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) +{ + struct ore_striping_info si; + struct page *page; + unsigned pg_len, p, c; + + ore_calc_stripe_info(ios->layout, *offset, 0, &si); + + p = si.unit_off / PAGE_SIZE; + c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, + ios->layout->mirrors_p1, si.par_dev, si.dev); + page = ios->sp2d->_1p_stripes[p].pages[c]; + + pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); + *offset += pg_len; + + ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n", + p, c, _LLU(*offset), pg_len, si.dev, si.par_dev); + + BUG_ON(!page); + + return _add_to_r4w(ios, &si, page, pg_len); +} + static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) { struct bio_vec *bv; @@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios) struct page **pp = &_1ps->pages[c]; bool uptodate; - if (*pp) + if (*pp) { + if (ios->offset % PAGE_SIZE) + /* Read the remainder of the page */ + _add_to_r4w_first_page(ios, *pp); /* to-be-written pages start here */ goto read_last_stripe; + } *pp = ios->r4w->get_page(ios->private, offset, &uptodate); @@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios) return -ENOMEM; if (!uptodate) - _add_to_read_4_write(ios, &read_si, *pp); + _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE); /* Mark read-pages to be cache_released */ _1ps->page_is_read[c] = true; @@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios) } read_last_stripe: - offset = ios->offset + (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE * PAGE_SIZE; + offset = ios->offset + ios->length; + if (offset % PAGE_SIZE) + _add_to_r4w_last_page(ios, &offset); + /* offset will be aligned to next page */ + last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) * bytes_in_stripe; if (offset == last_stripe_end) /* Optimize for the aligned case */ @@ -503,7 +553,7 @@ read_last_stripe: /* Mark read-pages to be cache_released */ _1ps->page_is_read[c] = true; if (!uptodate) - _add_to_read_4_write(ios, &read_si, page); + _add_to_r4w(ios, &read_si, page, PAGE_SIZE); } offset += PAGE_SIZE; @@ -616,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) return -ENOMEM; } - BUG_ON(ios->offset % PAGE_SIZE); - /* Round io down to last full strip */ first_stripe = div_u64(ios->offset, stripe_size); last_stripe = div_u64(ios->offset + ios->length, stripe_size); -- cgit v1.2.3 From 98c7089c769048f941bd5c5285287f8fc301f8b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:00:31 -0500 Subject: btrfs: preparation to fixing mount/umount race We need fs_info and root to live until the moment when the victim superblock leaves the list, so we need to postpone free_fs_info() until after ->put_super(). The call is buried in close_ctree(), though, so we need to lift it into the callers (including btrfs_put_super()) first. Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/super.c | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f99a099a774..dcb5d949b54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2424,6 +2424,7 @@ retry_root_backup: up_read(&fs_info->cleanup_work_sem); if (err) { close_ctree(tree_root); + free_fs_info(fs_info); return ERR_PTR(err); } } @@ -3059,8 +3060,6 @@ int close_ctree(struct btrfs_root *root) bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); - free_fs_info(fs_info); - return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ae488aa1966..a3f435e5898 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -151,6 +151,7 @@ static void btrfs_put_super(struct super_block *sb) int ret; ret = close_ctree(root); + free_fs_info(root->fs_info); sb->s_fs_info = NULL; (void)ret; /* FIXME: need to fix VFS to return error? */ @@ -589,6 +590,7 @@ static int btrfs_fill_super(struct super_block *sb, struct inode *inode; struct dentry *root_dentry; struct btrfs_root *tree_root; + struct btrfs_fs_info *fs_info; struct btrfs_key key; int err; @@ -609,12 +611,13 @@ static int btrfs_fill_super(struct super_block *sb, printk("btrfs: open_ctree failed\n"); return PTR_ERR(tree_root); } + fs_info = tree_root->fs_info; sb->s_fs_info = tree_root; key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL); + inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail_close; @@ -635,6 +638,7 @@ static int btrfs_fill_super(struct super_block *sb, fail_close: close_ctree(tree_root); + free_fs_info(fs_info); return err; } -- cgit v1.2.3 From aea52e19dd2085617dd7d247afb76a90cf2ea40c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:22:46 -0500 Subject: btrfs: get ->kill_sb() of its own ... and move free_fs_info() to that, out of ->put_super(). Do NOT set ->s_fs_info to NULL in the latter; we need it for sget() to be able to see and wait for fs in the middle of umount if we get a mount/umount race. Signed-off-by: Al Viro --- fs/btrfs/super.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a3f435e5898..eca48624a4f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -147,14 +147,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, static void btrfs_put_super(struct super_block *sb) { - struct btrfs_root *root = btrfs_sb(sb); - int ret; - - ret = close_ctree(root); - free_fs_info(root->fs_info); - sb->s_fs_info = NULL; - - (void)ret; /* FIXME: need to fix VFS to return error? */ + (void)close_ctree(btrfs_sb(sb)); + /* FIXME: need to fix VFS to return error? */ + /* AV: return it _where_? ->put_super() can be triggered by any number + * of async events, up to and including delivery of SIGKILL to the + * last process that kept it busy. Or segfault in the aforementioned + * process... Whom would you report that to? + */ } enum { @@ -1223,11 +1222,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static void btrfs_kill_super(struct super_block *sb) +{ + struct btrfs_fs_info *fs_info = NULL; + if (sb->s_root) + fs_info = btrfs_sb(sb)->fs_info; + kill_anon_super(sb); + if (fs_info) + free_fs_info(fs_info); +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .mount = btrfs_mount, - .kill_sb = kill_anon_super, + .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV, }; -- cgit v1.2.3 From 6de1d09d9677dce7a04ef485d426821159ab7de9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:29:09 -0500 Subject: btrfs: fix mount/umount race Do *NOT* skip doomed superblocks in btrfs_test_super(); we want sget() to wait for their shutdown to complete. Since we don't mutilate ->s_fs_info in ->put_super() anymore (or free what it used to point to until the superblock is past being findable by sget()), we can just DTRT there and report a match. Signed-off-by: Al Viro --- fs/btrfs/super.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eca48624a4f..b9fd62a0fca 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -733,20 +733,15 @@ static int btrfs_test_super(struct super_block *s, void *data) struct btrfs_root *test_root = data; struct btrfs_root *root = btrfs_sb(s); - /* - * If this super block is going away, return false as it - * can't match as an existing super block. - */ - if (!atomic_read(&s->s_active)) - return 0; return root->fs_info->fs_devices == test_root->fs_info->fs_devices; } static int btrfs_set_super(struct super_block *s, void *data) { - s->s_fs_info = data; - - return set_anon_super(s, data); + int err = set_anon_super(s, data); + if (!err) + s->s_fs_info = data; + return err; } /* -- cgit v1.2.3 From 10f6327b5d26660e06120ba17cff993cb616b1d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 15:05:22 -0500 Subject: btrfs: fix a deadlock in btrfs_scan_one_device() pathname resolution under a global mutex, taken on some paths in ->mount() is a Bad Idea(tm) - think what happens if said pathname resolution triggers automount of some btrfs instance and walks into attempt to grab the same mutex. Deadlock - we are waiting for daemon to finish walking the path, daemon is waiting for us to release the mutex... Signed-off-by: Al Viro --- fs/btrfs/volumes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9..96bb3c0a79b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -706,8 +706,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, u64 devid; u64 transid; - mutex_lock(&uuid_mutex); - flags |= FMODE_EXCL; bdev = blkdev_get_by_path(path, flags, holder); @@ -716,6 +714,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, goto error; } + mutex_lock(&uuid_mutex); ret = set_blocksize(bdev, 4096); if (ret) goto error_close; @@ -737,9 +736,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, brelse(bh); error_close: + mutex_unlock(&uuid_mutex); blkdev_put(bdev, flags); error: - mutex_unlock(&uuid_mutex); return ret; } -- cgit v1.2.3 From 2eb34cd368b3e5cef960ae9d9971a037cd6fa9d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 00:32:06 -0500 Subject: btrfs: sanitizing ->fs_info, part 1 take assignment of ->fs_info to callers of __setup_root() Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dcb5d949b54..04eba10af82 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1142,7 +1142,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->orphan_item_inserted = 0; root->orphan_cleanup_state = 0; - root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; root->highest_objectid = 0; @@ -1193,6 +1192,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u32 blocksize; u64 generation; + root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, objectid); @@ -1227,6 +1227,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); + root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, BTRFS_TREE_LOG_OBJECTID); @@ -1330,6 +1331,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, goto out; } + root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, location->objectid); @@ -2055,6 +2057,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); + tree_root->fs_info = fs_info; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -2247,6 +2250,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_chunk_root_level(disk_super)); generation = btrfs_super_chunk_root_generation(disk_super); + chunk_root->fs_info = fs_info; __setup_root(nodesize, leafsize, sectorsize, stripesize, chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); @@ -2373,6 +2377,7 @@ retry_root_backup: goto fail_trans_kthread; } + log_tree_root->fs_info = fs_info; __setup_root(nodesize, leafsize, sectorsize, stripesize, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); -- cgit v1.2.3 From 1233f546ec29ac32424327baf6ae5df81e3d9eae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 00:34:00 -0500 Subject: btrfs: sanitizing ->fs_info, part 2 lift assignment to callers of find_and_setup_root() Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 04eba10af82..b8dae517f80 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1192,7 +1192,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u32 blocksize; u64 generation; - root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, objectid); @@ -1322,6 +1321,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { + root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { @@ -2300,18 +2300,21 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); + extent_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) goto recovery_tree_root; extent_root->track_dirty = 1; + dev_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); if (ret) goto recovery_tree_root; dev_root->track_dirty = 1; + csum_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) -- cgit v1.2.3 From 38a77db49ad8f78369dcdfb693b8e5a818a60104 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 00:38:54 -0500 Subject: btrfs: sanitizing ->fs_info, part 3 move assignments to ->fs_info in open_ctree() up, to the place just after the original allocations. Assignment for tree_root becomes a no-op - we'd obtained fs_info from tree_root->fs_info in the first place. Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8dae517f80..06480fcf0fb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1320,8 +1320,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); + root->fs_info = fs_info; if (location->offset == (u64)-1) { - root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { @@ -1331,7 +1331,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, goto out; } - root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, location->objectid); @@ -1914,6 +1913,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } + chunk_root->fs_info = fs_info; + extent_root->fs_info = fs_info; + dev_root->fs_info = fs_info; + csum_root->fs_info = fs_info; ret = init_srcu_struct(&fs_info->subvol_srcu); if (ret) { @@ -2057,7 +2060,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); - tree_root->fs_info = fs_info; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -2250,7 +2252,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_chunk_root_level(disk_super)); generation = btrfs_super_chunk_root_generation(disk_super); - chunk_root->fs_info = fs_info; __setup_root(nodesize, leafsize, sectorsize, stripesize, chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); @@ -2300,21 +2301,18 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); - extent_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) goto recovery_tree_root; extent_root->track_dirty = 1; - dev_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); if (ret) goto recovery_tree_root; dev_root->track_dirty = 1; - csum_root->fs_info = fs_info; ret = find_and_setup_root(tree_root, fs_info, BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) -- cgit v1.2.3 From 6f07e42ee6fcc252a210781d7262f4051e9fd8f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 00:46:16 -0500 Subject: btrfs: sanitizing ->fs_info, part 4 A new helper: btrfs_alloc_root(fs_info); allocates btrfs_root and sets ->fs_info. All places allocating the suckers converted to it. At that point we *never* reassign ->fs_info of btrfs_root; it's set before anyone sees the address of newly allocated struct btrfs_root and never assigned anywhere else. Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 33 +++++++++++++++------------------ fs/btrfs/disk-io.h | 2 ++ fs/btrfs/super.c | 3 +-- 3 files changed, 18 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06480fcf0fb..ee846ce5884 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1215,6 +1215,14 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } +struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); + if (root) + root->fs_info = fs_info; + return root; +} + static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -1222,11 +1230,10 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; - root = kzalloc(sizeof(*root), GFP_NOFS); + root = btrfs_alloc_root(fs_info); if (!root) return ERR_PTR(-ENOMEM); - root->fs_info = fs_info; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, BTRFS_TREE_LOG_OBJECTID); @@ -1317,10 +1324,9 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, u32 blocksize; int ret = 0; - root = kzalloc(sizeof(*root), GFP_NOFS); + root = btrfs_alloc_root(fs_info); if (!root) return ERR_PTR(-ENOMEM); - root->fs_info = fs_info; if (location->offset == (u64)-1) { ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); @@ -1900,23 +1906,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; - extent_root = fs_info->extent_root = - kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - csum_root = fs_info->csum_root = - kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - chunk_root = fs_info->chunk_root = - kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - dev_root = fs_info->dev_root = - kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); + csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); + chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); + dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); if (!extent_root || !csum_root || !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } - chunk_root->fs_info = fs_info; - extent_root->fs_info = fs_info; - dev_root->fs_info = fs_info; - csum_root->fs_info = fs_info; ret = init_srcu_struct(&fs_info->subvol_srcu); if (ret) { @@ -2372,13 +2370,12 @@ retry_root_backup: btrfs_level_size(tree_root, btrfs_super_log_root_level(disk_super)); - log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { err = -ENOMEM; goto fail_trans_kthread; } - log_tree_root->fs_info = fs_info; __setup_root(nodesize, leafsize, sectorsize, stripesize, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c99d0a8f13f..2bb5f59ddf9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -86,6 +86,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); +struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info); + #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); void btrfs_set_buffer_lockdep_class(u64 objectid, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b9fd62a0fca..e9f876a1655 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -901,12 +901,11 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (!fs_info) return ERR_PTR(-ENOMEM); - fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + fs_info->tree_root = btrfs_alloc_root(fs_info); if (!fs_info->tree_root) { error = -ENOMEM; goto error_fs_info; } - fs_info->tree_root->fs_info = fs_info; fs_info->fs_devices = fs_devices; fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); -- cgit v1.2.3 From e3029d9fd426c8f582210ba35551ae5506218345 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 00:56:18 -0500 Subject: btrfs: sanitizing ->fs_info, part 5 close_ctree() uses a weird mix of accesses to root->fs_info and its value at the beginning of function stored in local variable. Since ->fs_info *never* changes, let's just use the local variable to avoid confusion. Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ee846ce5884..9308c7f13c1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2982,7 +2982,7 @@ int close_ctree(struct btrfs_root *root) (atomic_read(&fs_info->defrag_running) == 0)); /* clear out the rbtree of defraggable inodes */ - btrfs_run_defrag_inodes(root->fs_info); + btrfs_run_defrag_inodes(fs_info); /* * Here come 2 situations when btrfs is broken to flip readonly: @@ -3011,8 +3011,8 @@ int close_ctree(struct btrfs_root *root) btrfs_put_block_group_cache(fs_info); - kthread_stop(root->fs_info->transaction_kthread); - kthread_stop(root->fs_info->cleaner_kthread); + kthread_stop(fs_info->transaction_kthread); + kthread_stop(fs_info->cleaner_kthread); fs_info->closing = 2; smp_mb(); @@ -3030,14 +3030,14 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->extent_root->commit_root); free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->tree_root->commit_root); - free_extent_buffer(root->fs_info->chunk_root->node); - free_extent_buffer(root->fs_info->chunk_root->commit_root); - free_extent_buffer(root->fs_info->dev_root->node); - free_extent_buffer(root->fs_info->dev_root->commit_root); - free_extent_buffer(root->fs_info->csum_root->node); - free_extent_buffer(root->fs_info->csum_root->commit_root); - - btrfs_free_block_groups(root->fs_info); + free_extent_buffer(fs_info->chunk_root->node); + free_extent_buffer(fs_info->chunk_root->commit_root); + free_extent_buffer(fs_info->dev_root->node); + free_extent_buffer(fs_info->dev_root->commit_root); + free_extent_buffer(fs_info->csum_root->node); + free_extent_buffer(fs_info->csum_root->commit_root); + + btrfs_free_block_groups(fs_info); del_fs_roots(fs_info); -- cgit v1.2.3 From ad2b2c802be2d3e8ed8364fef5ffaddabe448219 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:10:02 -0500 Subject: btrfs: make open_ctree() return int It returns either ERR_PTR(-ve) or sb->s_fs_info. The latter can be found by caller just as well, TYVM, no need to return it. Just return -ve or 0... Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 12 ++++++------ fs/btrfs/disk-io.h | 6 +++--- fs/btrfs/super.c | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9308c7f13c1..78247522c69 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1880,9 +1880,9 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) } -struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices, - char *options) +int open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices, + char *options) { u32 sectorsize; u32 nodesize; @@ -2428,11 +2428,11 @@ retry_root_backup: if (err) { close_ctree(tree_root); free_fs_info(fs_info); - return ERR_PTR(err); + return err; } } - return tree_root; + return 0; fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); @@ -2479,7 +2479,7 @@ fail_srcu: fail: btrfs_close_devices(fs_info->fs_devices); free_fs_info(fs_info); - return ERR_PTR(err); + return err; recovery_tree_root: if (!btrfs_test_opt(tree_root, RECOVERY)) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2bb5f59ddf9..6f5f48778b0 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -46,9 +46,9 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); -struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices, - char *options); +int open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices, + char *options); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e9f876a1655..56e007fd670 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -604,12 +604,12 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_flags |= MS_POSIXACL; #endif - tree_root = open_ctree(sb, fs_devices, (char *)data); - - if (IS_ERR(tree_root)) { + err = open_ctree(sb, fs_devices, (char *)data); + if (err) { printk("btrfs: open_ctree failed\n"); - return PTR_ERR(tree_root); + return err; } + tree_root = sb->s_fs_info; fs_info = tree_root->fs_info; sb->s_fs_info = tree_root; -- cgit v1.2.3 From 29db78aa0ac82319b764b87a1c5030d74523e296 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:12:38 -0500 Subject: btrfs: kill pointless reassignment of ->s_fs_info in btrfs_fill_super() We do not (fortunately) modify ->s_fs_info of superblock on the fly in btrfs_fill_super(); apparent assignment is a no-op. Signed-off-by: Al Viro --- fs/btrfs/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 56e007fd670..a381f9f9b0c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -588,8 +588,8 @@ static int btrfs_fill_super(struct super_block *sb, { struct inode *inode; struct dentry *root_dentry; - struct btrfs_root *tree_root; - struct btrfs_fs_info *fs_info; + struct btrfs_root *tree_root = sb->s_fs_info; + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_key key; int err; @@ -609,9 +609,6 @@ static int btrfs_fill_super(struct super_block *sb, printk("btrfs: open_ctree failed\n"); return err; } - tree_root = sb->s_fs_info; - fs_info = tree_root->fs_info; - sb->s_fs_info = tree_root; key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; -- cgit v1.2.3 From be7e0950def403e90b5295ff2192c39967bf2aec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:40:05 -0500 Subject: btrfs: merge free_fs_info() calls on fill_super failures ... all the way up into btrfs_mount(). Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/super.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 78247522c69..b6d11eb4e43 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2427,7 +2427,6 @@ retry_root_backup: up_read(&fs_info->cleanup_work_sem); if (err) { close_ctree(tree_root); - free_fs_info(fs_info); return err; } } @@ -2478,7 +2477,6 @@ fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: btrfs_close_devices(fs_info->fs_devices); - free_fs_info(fs_info); return err; recovery_tree_root: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a381f9f9b0c..8901b6c8526 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -634,7 +634,6 @@ static int btrfs_fill_super(struct super_block *sb, fail_close: close_ctree(tree_root); - free_fs_info(fs_info); return err; } @@ -947,6 +946,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { + free_fs_info(fs_info); deactivate_locked_super(s); return ERR_PTR(error); } -- cgit v1.2.3 From d22ca7de770e2a683eac000ba4db5a95e2f4c969 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:46:50 -0500 Subject: btrfs: make free_fs_info() call ->kill_sb() unconditional ... and don't bother with it after btrfs_fill_super() failure - ->kill_sb() (unlike ->put_super()) will be called even if we have not got non-NULL ->s_root. Signed-off-by: Al Viro --- fs/btrfs/super.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8901b6c8526..f628a6a79ca 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -946,7 +946,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { - free_fs_info(fs_info); deactivate_locked_super(s); return ERR_PTR(error); } @@ -1215,12 +1214,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) static void btrfs_kill_super(struct super_block *sb) { - struct btrfs_fs_info *fs_info = NULL; - if (sb->s_root) - fs_info = btrfs_sb(sb)->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; kill_anon_super(sb); - if (fs_info) - free_fs_info(fs_info); + free_fs_info(fs_info); } static struct file_system_type btrfs_fs_type = { -- cgit v1.2.3 From 59553edf110e5576d91be9dd5bd53d110e0d0290 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 01:56:28 -0500 Subject: btrfs: consolidate failure exits in btrfs_mount() a bit Signed-off-by: Al Viro --- fs/btrfs/super.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f628a6a79ca..15c8ae571e4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -630,6 +630,7 @@ static int btrfs_fill_super(struct super_block *sb, save_mount_options(sb, data); cleancache_init_fs(sb); + sb->s_flags |= MS_ACTIVE; return 0; fail_close: @@ -929,14 +930,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } if (s->s_root) { - if ((flags ^ s->s_flags) & MS_RDONLY) { - deactivate_locked_super(s); - error = -EBUSY; - goto error_close_devices; - } - btrfs_close_devices(fs_devices); free_fs_info(fs_info); + if ((flags ^ s->s_flags) & MS_RDONLY) + error = -EBUSY; } else { char b[BDEVNAME_SIZE]; @@ -945,19 +942,11 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, btrfs_sb(s)->fs_info->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } - - s->s_flags |= MS_ACTIVE; } - root = get_default_root(s, subvol_objectid); - if (IS_ERR(root)) { + root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); + if (IS_ERR(root)) deactivate_locked_super(s); - return root; - } return root; -- cgit v1.2.3 From 815745cf3e46681241ad8025602ffbf2a452d514 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 15:40:49 -0500 Subject: btrfs: let ->s_fs_info point to fs_info, not root... the latter can be obtained from the former (by looking as ->tree_root) just as cheaply as we currently are doing the other way round. Signed-off-by: Al Viro --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 4 +-- fs/btrfs/export.c | 2 +- fs/btrfs/ioctl.c | 7 +++-- fs/btrfs/super.c | 75 +++++++++++++++++++++++++++--------------------------- 5 files changed, 45 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67385033323..dbb999a3827 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2196,7 +2196,7 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, return btrfs_item_size(eb, e) - offset; } -static inline struct btrfs_root *btrfs_sb(struct super_block *sb) +static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b6d11eb4e43..f7d8b9ba519 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1894,8 +1894,8 @@ int open_ctree(struct super_block *sb, struct btrfs_key location; struct buffer_head *bh; struct btrfs_super_block *disk_super; - struct btrfs_root *tree_root = btrfs_sb(sb); - struct btrfs_fs_info *fs_info = tree_root->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root; struct btrfs_root *csum_root; struct btrfs_root *chunk_root; diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 1b8dc33778f..5f77166fd01 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -67,7 +67,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, u64 root_objectid, u32 generation, int check_generation) { - struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root; struct inode *inode; struct btrfs_key key; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5441ff1480f..0c55f8f3809 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -276,14 +276,13 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) { - struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); struct btrfs_device *device; struct request_queue *q; struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -312,7 +311,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); - ret = btrfs_trim_fs(root, &range); + ret = btrfs_trim_fs(fs_info->tree_root, &range); if (ret < 0) return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 15c8ae571e4..305adf6dc09 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -147,7 +147,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, static void btrfs_put_super(struct super_block *sb) { - (void)close_ctree(btrfs_sb(sb)); + (void)close_ctree(btrfs_sb(sb)->tree_root); /* FIXME: need to fix VFS to return error? */ /* AV: return it _where_? ->put_super() can be triggered by any number * of async events, up to and including delivery of SIGKILL to the @@ -500,7 +500,8 @@ out: static struct dentry *get_default_root(struct super_block *sb, u64 subvol_objectid) { - struct btrfs_root *root = sb->s_fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_root *root = fs_info->tree_root; struct btrfs_root *new_root; struct btrfs_dir_item *di; struct btrfs_path *path; @@ -530,7 +531,7 @@ static struct dentry *get_default_root(struct super_block *sb, * will mount by default if we haven't been given a specific subvolume * to mount. */ - dir_id = btrfs_super_root_dir(root->fs_info->super_copy); + dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); if (IS_ERR(di)) { btrfs_free_path(path); @@ -544,7 +545,7 @@ static struct dentry *get_default_root(struct super_block *sb, */ btrfs_free_path(path); dir_id = BTRFS_FIRST_FREE_OBJECTID; - new_root = root->fs_info->fs_root; + new_root = fs_info->fs_root; goto setup_root; } @@ -552,7 +553,7 @@ static struct dentry *get_default_root(struct super_block *sb, btrfs_free_path(path); find_root: - new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + new_root = btrfs_read_fs_root_no_name(fs_info, &location); if (IS_ERR(new_root)) return ERR_CAST(new_root); @@ -588,8 +589,7 @@ static int btrfs_fill_super(struct super_block *sb, { struct inode *inode; struct dentry *root_dentry; - struct btrfs_root *tree_root = sb->s_fs_info; - struct btrfs_fs_info *fs_info = tree_root->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_key key; int err; @@ -634,20 +634,21 @@ static int btrfs_fill_super(struct super_block *sb, return 0; fail_close: - close_ctree(tree_root); + close_ctree(fs_info->tree_root); return err; } int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_root *root = fs_info->tree_root; int ret; trace_btrfs_sync_fs(wait); if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); + filemap_flush(fs_info->btree_inode->i_mapping); return 0; } @@ -663,8 +664,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { - struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_fs_info *info = root->fs_info; + struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); + struct btrfs_root *root = info->tree_root; char *compress_type; if (btrfs_test_opt(root, DEGRADED)) @@ -727,10 +728,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) static int btrfs_test_super(struct super_block *s, void *data) { - struct btrfs_root *test_root = data; - struct btrfs_root *root = btrfs_sb(s); + struct btrfs_fs_info *p = data; + struct btrfs_fs_info *fs_info = btrfs_sb(s); - return root->fs_info->fs_devices == test_root->fs_info->fs_devices; + return fs_info->fs_devices == p->fs_devices; } static int btrfs_set_super(struct super_block *s, void *data) @@ -922,8 +923,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, - fs_info->tree_root); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); goto error_close_devices; @@ -939,7 +939,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - btrfs_sb(s)->fs_info->bdev_holder = fs_type; + btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); } @@ -959,7 +959,8 @@ error_fs_info: static int btrfs_remount(struct super_block *sb, int *flags, char *data) { - struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_root *root = fs_info->tree_root; int ret; ret = btrfs_parse_options(root, data); @@ -975,13 +976,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ret = btrfs_commit_super(root); WARN_ON(ret); } else { - if (root->fs_info->fs_devices->rw_devices == 0) + if (fs_info->fs_devices->rw_devices == 0) return -EACCES; - if (btrfs_super_log_root(root->fs_info->super_copy) != 0) + if (btrfs_super_log_root(fs_info->super_copy) != 0) return -EINVAL; - ret = btrfs_cleanup_fs_roots(root->fs_info); + ret = btrfs_cleanup_fs_roots(fs_info); WARN_ON(ret); /* recover relocation */ @@ -1150,18 +1151,18 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = root->fs_info->super_copy; - struct list_head *head = &root->fs_info->space_info; + struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); + struct btrfs_super_block *disk_super = fs_info->super_copy; + struct list_head *head = &fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; int bits = dentry->d_sb->s_blocksize_bits; - __be32 *fsid = (__be32 *)root->fs_info->fsid; + __be32 *fsid = (__be32 *)fs_info->fsid; int ret; /* holding chunk_muext to avoid allocating new chunks */ - mutex_lock(&root->fs_info->chunk_mutex); + mutex_lock(&fs_info->chunk_mutex); rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { @@ -1180,14 +1181,14 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bavail = total_free_data; - ret = btrfs_calc_avail_data_space(root, &total_free_data); + ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); if (ret) { - mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&fs_info->chunk_mutex); return ret; } buf->f_bavail += total_free_data; buf->f_bavail = buf->f_bavail >> bits; - mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&fs_info->chunk_mutex); /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted @@ -1203,7 +1204,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) static void btrfs_kill_super(struct super_block *sb) { - struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); kill_anon_super(sb); free_fs_info(fs_info); } @@ -1246,17 +1247,17 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, static int btrfs_freeze(struct super_block *sb) { - struct btrfs_root *root = btrfs_sb(sb); - mutex_lock(&root->fs_info->transaction_kthread_mutex); - mutex_lock(&root->fs_info->cleaner_mutex); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + mutex_lock(&fs_info->transaction_kthread_mutex); + mutex_lock(&fs_info->cleaner_mutex); return 0; } static int btrfs_unfreeze(struct super_block *sb) { - struct btrfs_root *root = btrfs_sb(sb); - mutex_unlock(&root->fs_info->cleaner_mutex); - mutex_unlock(&root->fs_info->transaction_kthread_mutex); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + mutex_unlock(&fs_info->cleaner_mutex); + mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; } -- cgit v1.2.3 From f84a8bd60e3ee49eacc9ba824babf149ba3dad7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 15:57:57 -0500 Subject: btrfs: take allocation of ->tree_root into open_ctree() now that we don't need it for sget() anymore... Signed-off-by: Al Viro --- fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/disk-io.h | 2 -- fs/btrfs/super.c | 5 ----- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f7d8b9ba519..dd71be87593 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1215,7 +1215,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } -struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) { struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); if (root) @@ -1895,7 +1895,7 @@ int open_ctree(struct super_block *sb, struct buffer_head *bh; struct btrfs_super_block *disk_super; struct btrfs_fs_info *fs_info = btrfs_sb(sb); - struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *tree_root; struct btrfs_root *extent_root; struct btrfs_root *csum_root; struct btrfs_root *chunk_root; @@ -1906,12 +1906,14 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; + tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); - if (!extent_root || !csum_root || !chunk_root || !dev_root) { + if (!tree_root || !extent_root || !csum_root || + !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 6f5f48778b0..e4bc4741319 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -86,8 +86,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info); - #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); void btrfs_set_buffer_lockdep_class(u64 objectid, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 305adf6dc09..044652ee7ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -899,11 +899,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (!fs_info) return ERR_PTR(-ENOMEM); - fs_info->tree_root = btrfs_alloc_root(fs_info); - if (!fs_info->tree_root) { - error = -ENOMEM; - goto error_fs_info; - } fs_info->fs_devices = fs_devices; fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); -- cgit v1.2.3 From 3850aba74873aa47fefe6900b99f42f5e656a6e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2012 19:40:27 -0500 Subject: devpts: fix double-free on mount failure devpts_kill_sb() is called even if devpts_fill_super() fails; we should not do that kfree() in the latter, especially not with ->s_fs_info left pointing to freed object. Double kfree() is a Bad Thing(tm)... Signed-off-by: Al Viro --- fs/devpts/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 79673eb7115..c4e2a58a2e8 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -301,7 +301,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode = new_inode(s); if (!inode) - goto free_fsi; + goto fail; inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; @@ -316,8 +316,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) printk(KERN_ERR "devpts: get root dentry failed\n"); iput(inode); -free_fsi: - kfree(s->s_fs_info); fail: return -ENOMEM; } -- cgit v1.2.3 From da01636a6511c3bd0c1cf546c47b8e92a837a613 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2012 19:45:28 -0500 Subject: exofs: oops after late failure in mount We have already set ->s_root, so ->put_super() is going to be called. Freeing ->s_fs_info is a bloody bad idea when it's going to be dereferenced very shortly... Signed-off-by: Al Viro --- fs/exofs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8addfe314dc..d22cd168c6e 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -838,6 +838,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); if (ret) { EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); + dput(sb->s_root); + sb->s_root = NULL; goto free_sbi; } -- cgit v1.2.3 From 0ce8c0109f548ed75535d96ec5a347b410ed1472 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2012 19:50:23 -0500 Subject: ext[34]: avoid i_nlink warnings triggered by drop_nlink/inc_nlink kludge in symlink() Both ext3 and ext4 put the half-created symlink inode into the orphan list for a while (see the comment in ext[34]_symlink() for gory details). Then, if everything went fine, they pull it out of the orphan list and bump the link count back to 1. The thing is, inc_nlink() is going to complain about seeing somebody changing i_nlink from 0 to 1. With a good reason, since normally something like that is a bug. Explicit set_nlink(inode, 1) does the same thing as inc_nlink() here, but it does *not* complain - exactly because it should be usable in strange situations like this one. Signed-off-by: Al Viro --- fs/ext3/namei.c | 2 +- fs/ext4/namei.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4f35b2f315d..d269821203f 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2272,7 +2272,7 @@ retry: err = PTR_ERR(handle); goto err_drop_inode; } - inc_nlink(inode); + set_nlink(inode, 1); err = ext3_orphan_del(handle, inode); if (err) { ext3_journal_stop(handle); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 86edc45b52a..2043f482375 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2315,7 +2315,7 @@ retry: err = PTR_ERR(handle); goto err_drop_inode; } - inc_nlink(inode); + set_nlink(inode, 1); err = ext4_orphan_del(handle, inode); if (err) { ext4_journal_stop(handle); -- cgit v1.2.3 From d03e1292c46721f60830c5d2e334966472002ed0 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 5 Dec 2011 15:55:11 +0800 Subject: ext3: replace ll_rw_block with other functions ll_rw_block() is deprecated. Thus we replace it with other functions. CC: Jan Kara Signed-off-by: Zheng Liu Signed-off-by: Jan Kara --- fs/ext3/inode.c | 14 +++++++------- fs/ext3/namei.c | 9 ++++++--- fs/ext3/super.c | 10 +++++----- 3 files changed, 18 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9da9125ba3e..a8d3217c5cf 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1136,9 +1136,11 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, bh = ext3_getblk(handle, inode, block, create, err); if (!bh) return bh; - if (buffer_uptodate(bh)) + if (bh_uptodate_or_lock(bh)) return bh; - ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2068,12 +2070,10 @@ static int ext3_block_truncate_page(struct inode *inode, loff_t from) if (PageUptodate(page)) set_buffer_uptodate(bh); - if (!buffer_uptodate(bh)) { - err = -EIO; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); + if (!bh_uptodate_or_lock(bh)) { + err = bh_submit_read(bh); /* Uhhuh. Read error. Complain and punt. */ - if (!buffer_uptodate(bh)) + if (err) goto unlock; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 642dc6d66df..337c3f3375c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -921,9 +921,12 @@ restart: num++; bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; - if (bh) - ll_rw_block(READ | REQ_META | REQ_PRIO, - 1, &bh); + if (bh && !bh_uptodate_or_lock(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ | REQ_META | REQ_PRIO, + bh); + } } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 767fa3a2bd1..662cef4569a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2231,11 +2231,11 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, goto out_bdev; } journal->j_private = sb; - ll_rw_block(READ, 1, &journal->j_sb_buffer); - wait_on_buffer(journal->j_sb_buffer); - if (!buffer_uptodate(journal->j_sb_buffer)) { - ext3_msg(sb, KERN_ERR, "I/O error on journal device"); - goto out_journal; + if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { + if (bh_submit_read(journal->j_sb_buffer)) { + ext3_msg(sb, KERN_ERR, "I/O error on journal device"); + goto out_journal; + } } if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { ext3_msg(sb, KERN_ERR, -- cgit v1.2.3 From 1415dd8705394399d59a3df1ab48d149e1e41e77 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Dec 2011 21:13:46 +0100 Subject: ext3: Fix error handling on inode bitmap corruption When insert_inode_locked() fails in ext3_new_inode() it most likely means inode bitmap got corrupted and we allocated again inode which is already in use. Also doing unlock_new_inode() during error recovery is wrong since inode does not have I_NEW set. Fix the problem by jumping to fail: (instead of fail_drop:) which declares filesystem error and does not call unlock_new_inode(). Reviewed-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/ialloc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 5c866e06e7a..adae962ee95 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -525,8 +525,12 @@ got: if (IS_DIRSYNC(inode)) handle->h_sync = 1; if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; -- cgit v1.2.3 From ef6919c283257155def420bd247140e9fd2e9843 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Dec 2011 00:08:58 +0100 Subject: ext2: Fix error handling on inode bitmap corruption When insert_inode_locked() fails in ext2_new_inode() it most likely means inode bitmap got corrupted and we allocated again inode which is already in use. Also doing unlock_new_inode() during error recovery is wrong since the inode does not have I_NEW set. Fix the problem by informing about filesystem error and jumping to fail: (instead of fail_drop:) which doesn't call unlock_new_inode(). Reviewed-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext2/ialloc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c4e81dfb74b..78502c16681 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -573,8 +573,11 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + ext2_error(sb, "ext2_new_inode", + "inode number already in use - inode=%lu", + (unsigned long) ino); + err = -EIO; + goto fail; } dquot_initialize(inode); -- cgit v1.2.3 From 7b0b0933a3ff6052addf4d49ea99f75ab27df2d0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 10 Dec 2011 01:43:33 +0100 Subject: udf: Cleanup calling convention of inode_getblk() inode_getblk() always returned NULL and passed results in its parameters. Make the function return something useful - found block number. Signed-off-by: Jan Kara --- fs/udf/inode.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4fd1d809738..1bd2c42f3b4 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -53,8 +53,7 @@ static int udf_update_inode(struct inode *, int); static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_sync_inode(struct inode *inode); static int udf_alloc_i_data(struct inode *inode, size_t size); -static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, - sector_t *, int *); +static sector_t inode_getblk(struct inode *, sector_t, int *, int *); static int8_t udf_insert_aext(struct inode *, struct extent_position, struct kernel_lb_addr, uint32_t); static void udf_split_extents(struct inode *, int *, int, int, @@ -310,7 +309,6 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { int err, new; - struct buffer_head *bh; sector_t phys = 0; struct udf_inode_info *iinfo; @@ -323,7 +321,6 @@ static int udf_get_block(struct inode *inode, sector_t block, err = -EIO; new = 0; - bh = NULL; iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); @@ -332,13 +329,10 @@ static int udf_get_block(struct inode *inode, sector_t block, iinfo->i_next_alloc_goal++; } - err = 0; - bh = inode_getblk(inode, block, &err, &phys, &new); - BUG_ON(bh); - if (err) + phys = inode_getblk(inode, block, &err, &new); + if (!phys) goto abort; - BUG_ON(!phys); if (new) set_buffer_new(bh_result); @@ -547,11 +541,10 @@ out: return err; } -static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, - int *err, sector_t *phys, int *new) +static sector_t inode_getblk(struct inode *inode, sector_t block, + int *err, int *new) { static sector_t last_block; - struct buffer_head *result = NULL; struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; struct extent_position prev_epos, cur_epos, next_epos; int count = 0, startnum = 0, endnum = 0; @@ -566,6 +559,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; + *err = 0; + *new = 0; prev_epos.offset = udf_file_entry_alloc_offset(inode); prev_epos.block = iinfo->i_location; prev_epos.bh = NULL; @@ -635,8 +630,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, brelse(cur_epos.bh); brelse(next_epos.bh); newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); - *phys = newblock; - return NULL; + return newblock; } last_block = block; @@ -664,7 +658,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, brelse(cur_epos.bh); brelse(next_epos.bh); *err = ret; - return NULL; + return 0; } c = 0; offset = 0; @@ -729,7 +723,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, if (!newblocknum) { brelse(prev_epos.bh); *err = -ENOSPC; - return NULL; + return 0; } iinfo->i_lenExtents += inode->i_sb->s_blocksize; } @@ -761,10 +755,10 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, newblock = udf_get_pblock(inode->i_sb, newblocknum, iinfo->i_location.partitionReferenceNum, 0); - if (!newblock) - return NULL; - *phys = newblock; - *err = 0; + if (!newblock) { + *err = -EIO; + return 0; + } *new = 1; iinfo->i_next_alloc_block = block; iinfo->i_next_alloc_goal = newblocknum; @@ -775,7 +769,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, else mark_inode_dirty(inode); - return result; + return newblock; } static void udf_split_extents(struct inode *inode, int *c, int offset, -- cgit v1.2.3 From d2eb8c359309ec45d6bf5b147303ab8e13be86ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 10 Dec 2011 02:30:48 +0100 Subject: udf: Fix deadlock when converting file from in-ICB one to normal one During BKL removal in 2.6.38, conversion of files from in-ICB format to normal format got broken. We call ->writepage with i_data_sem held but udf_get_block() also acquires i_data_sem thus creating A-A deadlock. We fix the problem by dropping i_data_sem before calling ->writepage() which is safe since i_mutex still protects us against any changes in the file. Also fix pagelock - i_data_sem lock inversion in udf_expand_file_adinicb() by dropping i_data_sem before calling find_or_create_page(). CC: stable@kernel.org Reported-by: Matthias Matiak Tested-by: Matthias Matiak Reviewed-by: Namjae Jeon Signed-off-by: Jan Kara --- fs/udf/file.c | 6 +++--- fs/udf/inode.c | 21 ++++++++++++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/udf/file.c b/fs/udf/file.c index d8ffa7cc661..dca0c3881e8 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, err = udf_expand_file_adinicb(inode); if (err) { udf_debug("udf_expand_adinicb: err=%d\n", err); - up_write(&iinfo->i_data_sem); return err; } } else { @@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, iinfo->i_lenAlloc = pos + count; else iinfo->i_lenAlloc = inode->i_size; + up_write(&iinfo->i_data_sem); } - } - up_write(&iinfo->i_data_sem); + } else + up_write(&iinfo->i_data_sem); retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); if (retval > 0) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1bd2c42f3b4..4f7b1ffd9e3 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -150,6 +150,12 @@ const struct address_space_operations udf_aops = { .bmap = udf_bmap, }; +/* + * Expand file stored in ICB to a normal one-block-file + * + * This function requires i_data_sem for writing and releases it. + * This function requires i_mutex held + */ int udf_expand_file_adinicb(struct inode *inode) { struct page *page; @@ -168,9 +174,15 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + up_write(&iinfo->i_data_sem); mark_inode_dirty(inode); return 0; } + /* + * Release i_data_sem so that we can lock a page - page lock ranks + * above i_data_sem. i_mutex still protects us against file changes. + */ + up_write(&iinfo->i_data_sem); page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); if (!page) @@ -186,6 +198,7 @@ int udf_expand_file_adinicb(struct inode *inode) SetPageUptodate(page); kunmap(page); } + down_write(&iinfo->i_data_sem); memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, iinfo->i_lenAlloc); iinfo->i_lenAlloc = 0; @@ -195,17 +208,20 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + up_write(&iinfo->i_data_sem); err = inode->i_data.a_ops->writepage(page, &udf_wbc); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); kaddr = kmap(page); + down_write(&iinfo->i_data_sem); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); kunmap(page); unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + up_write(&iinfo->i_data_sem); } page_cache_release(page); mark_inode_dirty(inode); @@ -1105,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (bsize < (udf_file_entry_alloc_offset(inode) + newsize)) { err = udf_expand_file_adinicb(inode); - if (err) { - up_write(&iinfo->i_data_sem); + if (err) return err; - } + down_write(&iinfo->i_data_sem); } else iinfo->i_lenAlloc = newsize; } -- cgit v1.2.3 From fef2e9f3301934773e4f1b3cc5c7bffb119346b8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Dec 2011 15:13:50 +0100 Subject: udf: Treat symlink component of type 2 as / MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we ignore symlink component of type 2. But mkisofs and other OS' seem to treat it as / so do the same for compatibility. Reported-by: "Gábor S." Signed-off-by: Jan Kara --- fs/udf/symlink.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index b1d4488b0f1..d7c6dbe4194 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -41,10 +41,16 @@ static void udf_pc_to_char(struct super_block *sb, unsigned char *from, pc = (struct pathComponent *)(from + elen); switch (pc->componentType) { case 1: - if (pc->lengthComponentIdent == 0) { - p = to; - *p++ = '/'; - } + /* + * Symlink points to some place which should be agreed + * upon between originator and receiver of the media. Ignore. + */ + if (pc->lengthComponentIdent > 0) + break; + /* Fall through */ + case 2: + p = to; + *p++ = '/'; break; case 3: memcpy(p, "../", 3); -- cgit v1.2.3 From a06d789b424190e9f59da391681f908486db2554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2011 17:35:34 +0100 Subject: reiserfs: Fix quota mount option parsing When jqfmt mount option is not specified on remount, we mistakenly clear s_jquota_fmt value stored in superblock. Fix the problem. CC: stable@kernel.org CC: reiserfs-devel@vger.kernel.org Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 14363b96b6a..f9eaa4a4f5f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1164,7 +1164,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names, kfree(REISERFS_SB(s)->s_qf_names[i]); REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; } - REISERFS_SB(s)->s_jquota_fmt = *qfmt; + if (*qfmt) + REISERFS_SB(s)->s_jquota_fmt = *qfmt; } #endif -- cgit v1.2.3 From a9e36da655e54545c3289b2a0700b5c443de0edd Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 21 Dec 2011 21:18:43 +0100 Subject: reiserfs: Force inode evictions before umount to avoid crash This patch fixes a crash in reiserfs_delete_xattrs during umount. When shrink_dcache_for_umount clears the dcache from generic_shutdown_super, delayed evictions are forced to disk. If an evicted inode has extended attributes associated with it, it will need to walk the xattr tree to locate and remove them. But since shrink_dcache_for_umount will BUG if it encounters active dentries, the xattr tree must be released before it's called or it will crash during every umount. This patch forces the evictions to occur before generic_shutdown_super by calling shrink_dcache_sb first. The additional evictions caused by the removal of each associated xattr file and dir will be automatically handled as they're added to the LRU list. CC: reiserfs-devel@vger.kernel.org CC: stable@kernel.org Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f9eaa4a4f5f..5e3527be114 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -453,16 +453,20 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { - if (REISERFS_SB(s)->xattr_root) { - d_invalidate(REISERFS_SB(s)->xattr_root); - dput(REISERFS_SB(s)->xattr_root); - REISERFS_SB(s)->xattr_root = NULL; - } - if (REISERFS_SB(s)->priv_root) { - d_invalidate(REISERFS_SB(s)->priv_root); - dput(REISERFS_SB(s)->priv_root); - REISERFS_SB(s)->priv_root = NULL; - } + /* + * Force any pending inode evictions to occur now. Any + * inodes to be removed that have extended attributes + * associated with them need to clean them up before + * we can release the extended attribute root dentries. + * shrink_dcache_for_umount will BUG if we don't release + * those before it's called so ->put_super is too late. + */ + shrink_dcache_sb(s); + + dput(REISERFS_SB(s)->xattr_root); + REISERFS_SB(s)->xattr_root = NULL; + dput(REISERFS_SB(s)->priv_root); + REISERFS_SB(s)->priv_root = NULL; } kill_block_super(s); -- cgit v1.2.3 From 0048278552e9752fd578c3d8deee756987c10873 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Dec 2011 14:52:21 +0100 Subject: jbd: Remove j_barrier mutex j_barrier mutex is used for serializing different journal lock operations. The problem with it is that e.g. FIFREEZE ioctl results in process leaving kernel with j_barrier mutex held which makes lockdep freak out. Also hibernation code wants to freeze filesystem but it cannot do so because it then cannot hibernate the system because of mutex being locked. So we remove j_barrier mutex and use direct wait on j_barrier_count instead. Since locking journal is a rare operation we don't have to care about fairness or such things. CC: Andrew Morton Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/jbd/journal.c | 1 - fs/jbd/transaction.c | 38 ++++++++++++++++++++++---------------- include/linux/jbd.h | 4 ---- 3 files changed, 22 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index fea8dd661d2..1656dc2e6a0 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -721,7 +721,6 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); - mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 7e59c6e66f9..7fce94b04bc 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -426,17 +426,34 @@ int journal_restart(handle_t *handle, int nblocks) * void journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. * - * This locks out any further updates from being started, and blocks - * until all existing updates have completed, returning only once the - * journal is in a quiescent state with no updates running. - * - * The journal lock should not be held on entry. + * This locks out any further updates from being started, and blocks until all + * existing updates have completed, returning only once the journal is in a + * quiescent state with no updates running. + * + * We do not use simple mutex for synchronization as there are syscalls which + * want to return with filesystem locked and that trips up lockdep. Also + * hibernate needs to lock filesystem but locked mutex then blocks hibernation. + * Since locking filesystem is rare operation, we use simple counter and + * waitqueue for locking. */ void journal_lock_updates(journal_t *journal) { DEFINE_WAIT(wait); +wait: + /* Wait for previous locked operation to finish */ + wait_event(journal->j_wait_transaction_locked, + journal->j_barrier_count == 0); + spin_lock(&journal->j_state_lock); + /* + * Check reliably under the lock whether we are the ones winning the race + * and locking the journal + */ + if (journal->j_barrier_count > 0) { + spin_unlock(&journal->j_state_lock); + goto wait; + } ++journal->j_barrier_count; /* Wait until there are no running updates */ @@ -460,14 +477,6 @@ void journal_lock_updates(journal_t *journal) spin_lock(&journal->j_state_lock); } spin_unlock(&journal->j_state_lock); - - /* - * We have now established a barrier against other normal updates, but - * we also need to barrier against other journal_lock_updates() calls - * to make sure that we serialise special journal-locked operations - * too. - */ - mutex_lock(&journal->j_barrier); } /** @@ -475,14 +484,11 @@ void journal_lock_updates(journal_t *journal) * @journal: Journal to release the barrier on. * * Release a transaction barrier obtained with journal_lock_updates(). - * - * Should be called without the journal lock held. */ void journal_unlock_updates (journal_t *journal) { J_ASSERT(journal->j_barrier_count != 0); - mutex_unlock(&journal->j_barrier); spin_lock(&journal->j_state_lock); --journal->j_barrier_count; spin_unlock(&journal->j_state_lock); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 492cc12244f..d211732b9e9 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -497,7 +497,6 @@ struct transaction_s * @j_format_version: Version of the superblock format * @j_state_lock: Protect the various scalars in the journal * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself * @j_running_transaction: The current running transaction.. * @j_committing_transaction: the transaction we are pushing to disk * @j_checkpoint_transactions: a linked circular list of all transactions @@ -580,9 +579,6 @@ struct journal_s */ int j_barrier_count; - /* The barrier lock itself */ - struct mutex j_barrier; - /* * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] -- cgit v1.2.3 From 33c104d415e92a51aaf638dc3d93920cfa601e5c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Dec 2011 16:49:05 +0100 Subject: ext3: Don't warn from writepage when readonly inode is spotted after error WARN_ON_ONCE(IS_RDONLY(inode)) tends to trip when filesystem hits error and is remounted read-only. This unnecessarily scares users (well, they should be scared because of filesystem error, but the stack trace distracts them from the right source of their fear ;-). We could as well just remove the WARN_ON but it's not hard to fix it to not trip on filesystem with errors and not use more cycles in the common case so that's what we do. CC: stable@kernel.org Signed-off-by: Jan Kara --- fs/ext3/inode.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a8d3217c5cf..fbf6599a1ce 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1623,7 +1623,13 @@ static int ext3_ordered_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); /* * We give up here if we're reentered, because it might be for a @@ -1698,7 +1704,13 @@ static int ext3_writeback_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); if (ext3_journal_current_handle()) goto out_fail; @@ -1741,7 +1753,13 @@ static int ext3_journalled_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); if (ext3_journal_current_handle()) goto no_write; -- cgit v1.2.3 From 853a0c25baf96b028de1654bea1e0c8857eadf3d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 23 Dec 2011 11:53:07 +0100 Subject: udf: Mark LVID buffer as uptodate before marking it dirty When we hit EIO while writing LVID, the buffer uptodate bit is cleared. This then results in an anoying warning from mark_buffer_dirty() when we write the buffer again. So just set uptodate flag unconditionally. Reviewed-by: Namjae Jeon Signed-off-by: Jan Kara --- fs/udf/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index e185253470d..87cb24a0ee7 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1799,6 +1799,12 @@ static void udf_close_lvid(struct super_block *sb) le16_to_cpu(lvid->descTag.descCRCLength))); lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + /* + * We set buffer uptodate unconditionally here to avoid spurious + * warnings from mark_buffer_dirty() when previous EIO has marked + * the buffer as !uptodate + */ + set_buffer_uptodate(bh); mark_buffer_dirty(bh); sbi->s_lvid_dirty = 0; mutex_unlock(&sbi->s_alloc_mutex); -- cgit v1.2.3 From 6c2155b9cc5a193e85194bbeaae2e2e4512dd597 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Tue, 3 Jan 2012 02:31:52 +0100 Subject: ext{3,4}: Fix potential race when setversion ioctl updates inode The EXT{3,4}_IOC_SETVERSION ioctl() updates i_ctime and i_generation without i_mutex. This can lead to a race with the other operations that update i_ctime. This is not a big issue but let's make the ioctl consistent with how we handle e.g. other timestamp updates and use i_mutex to protect inode changes. Signed-off-by: Djalal Harouni Signed-off-by: Jan Kara --- fs/ext3/ioctl.c | 6 +++++- fs/ext4/ioctl.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index ba1b54e23ca..e7b2ed9d36c 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -134,10 +134,11 @@ flags_out: goto setversion_out; } + mutex_lock(&inode->i_mutex); handle = ext3_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); - goto setversion_out; + goto unlock_out; } err = ext3_reserve_inode_write(handle, inode, &iloc); if (err == 0) { @@ -146,6 +147,9 @@ flags_out: err = ext3_mark_iloc_dirty(handle, inode, &iloc); } ext3_journal_stop(handle); + +unlock_out: + mutex_unlock(&inode->i_mutex); setversion_out: mnt_drop_write(filp->f_path.mnt); return err; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a56796814d6..46a8de6f208 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -158,10 +158,11 @@ flags_out: goto setversion_out; } + mutex_lock(&inode->i_mutex); handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); - goto setversion_out; + goto unlock_out; } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { @@ -170,6 +171,9 @@ flags_out: err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); + +unlock_out: + mutex_unlock(&inode->i_mutex); setversion_out: mnt_drop_write(filp->f_path.mnt); return err; -- cgit v1.2.3 From 302bf2f3259948c93361d501b04a5ed69c3bd4f8 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 4 Jan 2012 15:59:47 -0500 Subject: ext2/3/4: delete unneeded includes of module.h Delete any instances of include module.h that were not strictly required. In the case of ext2, the declaration of MODULE_LICENSE etc. were in inode.c but the module_init/exit were in super.c, so relocate the MODULE_LICENCE/AUTHOR block to super.c which makes it consistent with ext3 and ext4 at the same time. Signed-off-by: Paul Gortmaker Signed-off-by: Jan Kara --- fs/ext2/inode.c | 5 ----- fs/ext2/super.c | 3 +++ fs/ext2/xattr.c | 1 - fs/ext2/xattr_security.c | 1 - fs/ext2/xattr_trusted.c | 1 - fs/ext2/xattr_user.c | 1 - fs/ext3/inode.c | 1 - fs/ext3/xattr_security.c | 1 - fs/ext3/xattr_trusted.c | 1 - fs/ext3/xattr_user.c | 1 - fs/ext4/block_validity.c | 1 - fs/ext4/extents.c | 1 - fs/ext4/indirect.c | 1 - fs/ext4/inode.c | 1 - fs/ext4/migrate.c | 1 - fs/ext4/page-io.c | 1 - fs/ext4/xattr_security.c | 1 - fs/ext4/xattr_trusted.c | 1 - fs/ext4/xattr_user.c | 1 - 19 files changed, 3 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 91a6945af6d..740cad8dcd8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -36,10 +35,6 @@ #include "acl.h" #include "xip.h" -MODULE_AUTHOR("Remy Card and others"); -MODULE_DESCRIPTION("Second Extended Filesystem"); -MODULE_LICENSE("GPL"); - static int __ext2_write_inode(struct inode *inode, int do_sync); /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index bd8ac164a3b..9e7e203146b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1521,5 +1521,8 @@ static void __exit exit_ext2_fs(void) exit_ext2_xattr(); } +MODULE_AUTHOR("Remy Card and others"); +MODULE_DESCRIPTION("Second Extended Filesystem"); +MODULE_LICENSE("GPL"); module_init(init_ext2_fs) module_exit(exit_ext2_fs) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index d27b71f1d18..6dcafc7efdf 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -54,7 +54,6 @@ */ #include -#include #include #include #include diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index c922adc8ef4..be7a8d02c9a 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 667e46a8d62..2989467d359 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 099d20f4716..f470e44c4b8 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -6,7 +6,6 @@ */ #include -#include #include #include "ext2.h" #include "xattr.h" diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fbf6599a1ce..261979feb4b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -22,7 +22,6 @@ * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */ -#include #include #include #include diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3c218b8a51d..ea26f2acab9 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index dc8edda9ffe..2526a8829de 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 7a321974d58..b32e473a1e3 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -5,7 +5,6 @@ * Copyright (C) 2001 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8efb2f0a344..3f11656bd72 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 61fa9e1614a..130ffe4818f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -29,7 +29,6 @@ * - smart tree reduction */ -#include #include #include #include diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3cfc73fbca8..830e1b2bf14 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -20,7 +20,6 @@ * (sct@redhat.com), 1993, 1998 */ -#include #include "ext4_jbd2.h" #include "truncate.h" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 240f6e2dc7e..e5385803336 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -18,7 +18,6 @@ * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 */ -#include #include #include #include diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 16ac228dbec..e7d6bb0acfa 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -12,7 +12,6 @@ * */ -#include #include #include "ext4_jbd2.h" diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7ce1d0b19c9..b1758538b3b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -6,7 +6,6 @@ * Written by Theodore Ts'o, 2010. */ -#include #include #include #include diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 34e4350dd4d..b60f9f81e33 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 37e6ebca2cc..95f1f4ab59a 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 98c375352d0..0edb7611ffb 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -5,7 +5,6 @@ * Copyright (C) 2001 by Andreas Gruenbacher, */ -#include #include #include #include "ext4_jbd2.h" -- cgit v1.2.3 From 8fdd8c49fe50394fef3e193db27222cb03c2b212 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2012 10:48:11 -0500 Subject: isofs: inode leak on mount failure d_alloc_root() failure leaves root inode leaked... Signed-off-by: Al Viro --- fs/isofs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 7b99f5f460b..bd62c76fb5d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -948,8 +948,11 @@ root_found: /* get the root dentry */ s->s_root = d_alloc_root(inode); - if (!(s->s_root)) - goto out_no_root; + if (!(s->s_root)) { + iput(inode); + error = -ENOMEM; + goto out_no_inode; + } kfree(opt.iocharset); -- cgit v1.2.3 From 53466710202900ce49e471f480cac11275e1d0c4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 6 Dec 2011 17:06:06 -0600 Subject: jffs2: fix up error handling for insert_inode_locked after 250df6ed274d767da844a5d9f05720b804240197 (fs: protect inode->i_state with inode->i_lock), insert_inode_locked() no longer returns the inode with I_NEW set on failure. However, the error handler still calls unlock_new_inode() on failure, which does a WARN_ON if I_NEW is not set, so any failure spews a lot of warnings. We can just drop the unlock_new_inode() if insert_inode_locked() fails here. Signed-off-by: Eric Sandeen Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/fs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 4b8afe39a87..2e0123867cb 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -466,7 +466,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r if (insert_inode_locked(inode) < 0) { make_bad_inode(inode); - unlock_new_inode(inode); iput(inode); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 48d3610268cef9cea0704119a74a00d1bf82f536 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 15:44:14 +0200 Subject: logfs: rename functions starting with mtd_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are going to re-work the MTD interface and change 'mtd->write()' to 'mtd_write()', 'mtd->read()' to 'mtd_read()' and so forth for all functions in the 'struct mtd_info' structure. However, logfs has its own 'mtd_read()', 'mtd_write()', etc functions which collide with our changes. This patch renames these logfs functions to 'logfs_mtd_read()', 'logfs_mtd_write()', etc. Additionally, to make the 'fs/logfs/dev_mtd.c' file look consistent, rename similarly all the other functions starting with 'mtd_'. Cc: Jörn Engel Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/logfs/dev_mtd.c | 61 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 339e17e9133..eb423ebcf53 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -13,7 +13,8 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) -static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf) +static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len, + void *buf) { struct mtd_info *mtd = logfs_super(sb)->s_mtd; size_t retlen; @@ -31,7 +32,8 @@ static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf) return 0; } -static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) +static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len, + void *buf) { struct logfs_super *super = logfs_super(sb); struct mtd_info *mtd = super->s_mtd; @@ -60,14 +62,15 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) * asynchronous properties. So just to prevent the first implementor of such * a thing from breaking logfs in 2350, we do the usual pointless dance to * declare a completion variable and wait for completion before returning - * from mtd_erase(). What an exercise in futility! + * from logfs_mtd_erase(). What an exercise in futility! */ static void logfs_erase_callback(struct erase_info *ei) { complete((struct completion *)ei->priv); } -static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len) +static int logfs_mtd_erase_mapping(struct super_block *sb, loff_t ofs, + size_t len) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; @@ -84,7 +87,7 @@ static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len) return 0; } -static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len, +static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len, int ensure_write) { struct mtd_info *mtd = logfs_super(sb)->s_mtd; @@ -109,10 +112,10 @@ static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len, wait_for_completion(&complete); if (ei.state != MTD_ERASE_DONE) return -EIO; - return mtd_erase_mapping(sb, ofs, len); + return logfs_mtd_erase_mapping(sb, ofs, len); } -static void mtd_sync(struct super_block *sb) +static void logfs_mtd_sync(struct super_block *sb) { struct mtd_info *mtd = logfs_super(sb)->s_mtd; @@ -120,12 +123,12 @@ static void mtd_sync(struct super_block *sb) mtd->sync(mtd); } -static int mtd_readpage(void *_sb, struct page *page) +static int logfs_mtd_readpage(void *_sb, struct page *page) { struct super_block *sb = _sb; int err; - err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, + err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, page_address(page)); if (err == -EUCLEAN || err == -EBADMSG) { /* -EBADMSG happens regularly on power failures */ @@ -143,11 +146,11 @@ static int mtd_readpage(void *_sb, struct page *page) return err; } -static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) +static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; - filler_t *filler = mtd_readpage; + filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; if (!mtd->block_isbad) @@ -163,11 +166,11 @@ static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); } -static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) +static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; - filler_t *filler = mtd_readpage; + filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; if (!mtd->block_isbad) @@ -184,7 +187,7 @@ static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); } -static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, +static int __logfs_mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, size_t nr_pages) { struct logfs_super *super = logfs_super(sb); @@ -196,8 +199,8 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, page = find_lock_page(mapping, index + i); BUG_ON(!page); - err = mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE, - page_address(page)); + err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE, + page_address(page)); unlock_page(page); page_cache_release(page); if (err) @@ -206,7 +209,7 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, return 0; } -static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) +static void logfs_mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) { struct logfs_super *super = logfs_super(sb); int head; @@ -227,15 +230,15 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) len += head; } len = PAGE_ALIGN(len); - __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); + __logfs_mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); } -static void mtd_put_device(struct logfs_super *s) +static void logfs_mtd_put_device(struct logfs_super *s) { put_mtd_device(s->s_mtd); } -static int mtd_can_write_buf(struct super_block *sb, u64 ofs) +static int logfs_mtd_can_write_buf(struct super_block *sb, u64 ofs) { struct logfs_super *super = logfs_super(sb); void *buf; @@ -244,7 +247,7 @@ static int mtd_can_write_buf(struct super_block *sb, u64 ofs) buf = kmalloc(super->s_writesize, GFP_KERNEL); if (!buf) return -ENOMEM; - err = mtd_read(sb, ofs, super->s_writesize, buf); + err = logfs_mtd_read(sb, ofs, super->s_writesize, buf); if (err) goto out; if (memchr_inv(buf, 0xff, super->s_writesize)) @@ -255,14 +258,14 @@ out: } static const struct logfs_device_ops mtd_devops = { - .find_first_sb = mtd_find_first_sb, - .find_last_sb = mtd_find_last_sb, - .readpage = mtd_readpage, - .writeseg = mtd_writeseg, - .erase = mtd_erase, - .can_write_buf = mtd_can_write_buf, - .sync = mtd_sync, - .put_device = mtd_put_device, + .find_first_sb = logfs_mtd_find_first_sb, + .find_last_sb = logfs_mtd_find_last_sb, + .readpage = logfs_mtd_readpage, + .writeseg = logfs_mtd_writeseg, + .erase = logfs_mtd_erase, + .can_write_buf = logfs_mtd_can_write_buf, + .sync = logfs_mtd_sync, + .put_device = logfs_mtd_put_device, }; int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) -- cgit v1.2.3 From 7e1f0dc0551b99acb5e8fa161a7ac401994d57d8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 15:25:39 +0200 Subject: mtd: introduce mtd_erase interface This patch is part of a patch-set which changes the MTD interface from 'mtd->func()' form to 'mtd_func()' form. We need this because we want to add common code to to all drivers in the mtd core level, which is impossible with the current interface when MTD clients call driver functions like 'read()' or 'write()' directly. At this point we just introduce a new inline wrapper function, but later some of them are expected to gain more code. E.g., the input parameters check should be moved to the wrappers rather than be duplicated at many drivers. This particular patch introduced the 'mtd_erase()' interface. The following patches add all the other interfaces one by one. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/ftl.c | 2 +- drivers/mtd/inftlmount.c | 4 ++-- drivers/mtd/mtdblock.c | 2 +- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdoops.c | 2 +- drivers/mtd/mtdpart.c | 2 +- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nftlmount.c | 2 +- drivers/mtd/rfd_ftl.c | 2 +- drivers/mtd/sm_ftl.c | 2 +- drivers/mtd/tests/mtd_oobtest.c | 2 +- drivers/mtd/tests/mtd_pagetest.c | 2 +- drivers/mtd/tests/mtd_speedtest.c | 4 ++-- drivers/mtd/tests/mtd_stresstest.c | 2 +- drivers/mtd/tests/mtd_subpagetest.c | 2 +- drivers/mtd/tests/mtd_torturetest.c | 2 +- drivers/mtd/ubi/io.c | 2 +- drivers/staging/spectra/lld_mtd.c | 2 +- fs/jffs2/erase.c | 2 +- fs/logfs/dev_mtd.c | 2 +- include/linux/mtd/mtd.h | 19 ++++++++++++++----- 22 files changed, 37 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index c7382bb686c..a982889277c 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -355,7 +355,7 @@ static int erase_xfer(partition_t *part, erase->len = 1 << part->header.EraseUnitSize; erase->priv = (u_long)part; - ret = part->mbd.mtd->erase(part->mbd.mtd, erase); + ret = mtd_erase(part->mbd.mtd, erase); if (!ret) xfer->EraseCount++; diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 2ff601f816c..0d946f10a68 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -220,7 +220,7 @@ static int find_boot_record(struct INFTLrecord *inftl) */ instr->addr = ip->Reserved0 * inftl->EraseSize; instr->len = inftl->EraseSize; - mtd->erase(mtd, instr); + mtd_erase(mtd, instr); } if ((ip->lastUnit - ip->firstUnit + 1) < ip->virtualUnits) { printk(KERN_WARNING "INFTL: Media Header " @@ -393,7 +393,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block) mark only the failed block in the bbt. */ for (physblock = 0; physblock < inftl->EraseSize; physblock += instr->len, instr->addr += instr->len) { - mtd->erase(inftl->mbd.mtd, instr); + mtd_erase(inftl->mbd.mtd, instr); if (instr->state == MTD_ERASE_FAILED) { printk(KERN_WARNING "INFTL: error while formatting block %d\n", diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 7c1dc908a17..9b01cb0266e 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -85,7 +85,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos, set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&wait_q, &wait); - ret = mtd->erase(mtd, &erase); + ret = mtd_erase(mtd, &erase); if (ret) { set_current_state(TASK_RUNNING); remove_wait_queue(&wait_q, &wait); diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 00423cc8580..41d64ff4c25 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -731,7 +731,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) wq_head is no longer there when the callback routine tries to wake us up. */ - ret = mtd->erase(mtd, erase); + ret = mtd_erase(mtd, erase); if (!ret) { set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&waitq, &wait); diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 6df4d4d4eb9..76123bd4931 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -379,7 +379,7 @@ static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase) * FIXME: Allow INTERRUPTIBLE. Which means * not having the wait_queue head on the stack. */ - err = mtd->erase(mtd, erase); + err = mtd_erase(mtd, erase); if (!err) { set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&waitq, &wait); diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index f3cdce9a85a..9b2d8632316 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -112,7 +112,7 @@ static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&wait_q, &wait); - ret = mtd->erase(mtd, &erase); + ret = mtd_erase(mtd, &erase); if (ret) { set_current_state(TASK_RUNNING); remove_wait_queue(&wait_q, &wait); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index a0bd2de4752..d318fee2859 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -257,7 +257,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr) if (instr->addr >= mtd->size) return -EINVAL; instr->addr += part->offset; - ret = part->master->erase(part->master, instr); + ret = mtd_erase(part->master, instr); if (ret) { if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN) instr->fail_addr -= part->offset; diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index bd9590c723e..4e12875a916 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -567,7 +567,7 @@ retry: erase.len = mtd->erasesize; erase.priv = (u_long)&wq; - ret = mtd->erase(mtd, &erase); + ret = mtd_erase(mtd, &erase); if (ret) { if (retries++ < MTDSWAP_ERASE_RETRIES) { dev_warn(d->dev, diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c index ac4092591ae..9164a56fb5c 100644 --- a/drivers/mtd/nftlmount.c +++ b/drivers/mtd/nftlmount.c @@ -326,7 +326,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block) instr->mtd = nftl->mbd.mtd; instr->addr = block * nftl->EraseSize; instr->len = nftl->EraseSize; - mtd->erase(mtd, instr); + mtd_erase(mtd, instr); if (instr->state == MTD_ERASE_FAILED) { printk("Error while formatting block %d\n", block); diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index 73ae217a425..39de8727a52 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -342,7 +342,7 @@ static int erase_block(struct partition *part, int block) part->blocks[block].state = BLOCK_ERASING; part->blocks[block].free_sectors = 0; - rc = part->mbd.mtd->erase(part->mbd.mtd, erase); + rc = mtd_erase(part->mbd.mtd, erase); if (rc) { printk(KERN_ERR PREFIX "erase of region %llx,%llx on '%s' " diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 1c9f307ae0a..2f1acb1ab5e 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -479,7 +479,7 @@ static int sm_erase_block(struct sm_ftl *ftl, int zone_num, uint16_t block, return -EIO; } - if (mtd->erase(mtd, &erase)) { + if (mtd_erase(mtd, &erase)) { sm_printk("erase of block %d in zone %d failed", block, zone_num); goto error; diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index 933f7e5f32d..7d52854c16d 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -78,7 +78,7 @@ static int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index afafb6935fd..271819fabb5 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -77,7 +77,7 @@ static int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index 493b367bdd3..f67a65e2104 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -79,7 +79,7 @@ static int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; @@ -105,7 +105,7 @@ static int multiblock_erase(int ebnum, int blocks) ei.addr = addr; ei.len = mtd->erasesize * blocks; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n", err, ebnum, blocks); diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 811642fea6b..a204a9f9052 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -112,7 +112,7 @@ static int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (unlikely(err)) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index 1a05bfac4ee..16d0c05024d 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -80,7 +80,7 @@ static int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index 03ab649a696..102c79b7ac6 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -105,7 +105,7 @@ static inline int erase_eraseblock(int ebnum) ei.addr = addr; ei.len = mtd->erasesize; - err = mtd->erase(mtd, &ei); + err = mtd_erase(mtd, &ei); if (err) { printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum); return err; diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index f20b6f22f24..b6c8959e6c7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -361,7 +361,7 @@ retry: ei.callback = erase_callback; ei.priv = (unsigned long)&wq; - err = ubi->mtd->erase(ubi->mtd, &ei); + err = mtd_erase(ubi->mtd, &ei); if (err) { if (retries++ < UBI_IO_RETRIES) { dbg_io("error %d while erasing PEB %d, retry", diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c index a9c309a167c..d638fafab64 100644 --- a/drivers/staging/spectra/lld_mtd.c +++ b/drivers/staging/spectra/lld_mtd.c @@ -188,7 +188,7 @@ u16 mtd_Erase_Block(u32 block_add) erase.len = spectra_mtd->erasesize; erase.priv = (unsigned long)∁ - ret = spectra_mtd->erase(spectra_mtd, &erase); + ret = mtd_erase(spectra_mtd, &erase); if (!ret) { wait_for_completion(&comp); if (erase.state != MTD_ERASE_DONE) diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index e513f1913c1..540e8eca1b4 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -74,7 +74,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, ((struct erase_priv_struct *)instr->priv)->jeb = jeb; ((struct erase_priv_struct *)instr->priv)->c = c; - ret = c->mtd->erase(c->mtd, instr); + ret = mtd_erase(c->mtd, instr); if (!ret) return; diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index eb423ebcf53..04636289435 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -105,7 +105,7 @@ static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len, ei.len = len; ei.callback = logfs_erase_callback; ei.priv = (long)&complete; - ret = mtd->erase(mtd, &ei); + ret = mtd_erase(mtd, &ei); if (ret) return -EIO; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 9f5b312af78..201bad55704 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -171,11 +171,8 @@ struct mtd_info { struct mtd_erase_region_info *eraseregions; /* - * Erase is an asynchronous operation. Device drivers are supposed - * to call instr->callback() whenever the operation completes, even - * if it completes with a failure. - * Callers are supposed to pass a callback function and wait for it - * to be called before writing to the block. + * Do not call via these pointers, use corresponding mtd_*() + * wrappers instead. */ int (*erase) (struct mtd_info *mtd, struct erase_info *instr); @@ -274,6 +271,18 @@ struct mtd_info { void (*put_device) (struct mtd_info *mtd); }; +/* + * Erase is an asynchronous operation. Device drivers are supposed + * to call instr->callback() whenever the operation completes, even + * if it completes with a failure. + * Callers are supposed to pass a callback function and wait for it + * to be called before writing to the block. + */ +static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + return mtd->erase(mtd, instr); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From d35ea200c0fb5315f16fb2599a4bafd9c1a7b386 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 17:00:37 +0200 Subject: mtd: introduce mtd_point interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdpart.c | 4 ++-- fs/jffs2/erase.c | 4 ++-- fs/jffs2/readinode.c | 4 ++-- fs/jffs2/scan.c | 4 ++-- include/linux/mtd/mtd.h | 14 ++++++++++---- 5 files changed, 18 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index d318fee2859..5b664722e5b 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -89,8 +89,8 @@ static int part_point(struct mtd_info *mtd, loff_t from, size_t len, len = 0; else if (from + len > mtd->size) len = mtd->size - from; - return part->master->point (part->master, from + part->offset, - len, retlen, virt, phys); + return mtd_point(part->master, from + part->offset, len, retlen, + virt, phys); } static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len) diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 540e8eca1b4..53f8794fda6 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -340,8 +340,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (c->mtd->point) { unsigned long *wordebuf; - ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, - &retlen, &ebuf, NULL); + ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, + &ebuf, NULL); if (ret) { D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); goto do_flash_read; diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index ee57bac1ba6..dde61effeda 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -63,8 +63,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), * adding and jffs2_flash_read_end() interface. */ if (c->mtd->point) { - err = c->mtd->point(c->mtd, ofs, len, &retlen, - (void **)&buffer, NULL); + err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, + NULL); if (!err && retlen < len) { JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); c->mtd->unpoint(c->mtd, ofs, retlen); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 28107ca136e..53e05c8e5b6 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -97,8 +97,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) size_t pointlen, try_size; if (c->mtd->point) { - ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, - (void **)&flashbuf, NULL); + ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen, + (void **)&flashbuf, NULL); if (!ret && pointlen < c->mtd->size) { /* Don't muck about if it won't let us point to the whole flash */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 201bad55704..ca7bfdaf7a6 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -175,11 +175,8 @@ struct mtd_info { * wrappers instead. */ int (*erase) (struct mtd_info *mtd, struct erase_info *instr); - - /* This stuff for eXecute-In-Place */ - /* phys is optional and may be set to NULL */ int (*point) (struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, void **virt, resource_size_t *phys); + size_t *retlen, void **virt, resource_size_t *phys); /* We probably shouldn't allow XIP if the unpoint isn't a NULL */ void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len); @@ -283,6 +280,15 @@ static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) return mtd->erase(mtd, instr); } +/* + * This stuff for eXecute-In-Place. phys is optional and may be set to NULL. + */ +static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, void **virt, resource_size_t *phys) +{ + return mtd->point(mtd, from, len, retlen, virt, phys); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 7219778ad9c18cc2c05c7fca0abe026afbc19dfb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 17:05:52 +0200 Subject: mtd: introduce mtd_unpoint interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdpart.c | 2 +- fs/jffs2/erase.c | 4 ++-- fs/jffs2/readinode.c | 6 +++--- fs/jffs2/scan.c | 4 ++-- include/linux/mtd/mtd.h | 8 ++++++-- 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 5b664722e5b..b09624a5497 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -97,7 +97,7 @@ static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_part *part = PART(mtd); - part->master->unpoint(part->master, from + part->offset, len); + mtd_unpoint(part->master, from + part->offset, len); } static unsigned long part_get_unmapped_area(struct mtd_info *mtd, diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 53f8794fda6..ffdf4fca9c5 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -349,7 +349,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (retlen < c->sector_size) { /* Don't muck about if it won't let us point to the whole erase sector */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); - c->mtd->unpoint(c->mtd, jeb->offset, retlen); + mtd_unpoint(c->mtd, jeb->offset, retlen); goto do_flash_read; } wordebuf = ebuf-sizeof(*wordebuf); @@ -358,7 +358,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl if (*++wordebuf != ~0) break; } while(--retlen); - c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size); + mtd_unpoint(c->mtd, jeb->offset, c->sector_size); if (retlen) { printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index dde61effeda..fca2f84e1ad 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -67,7 +67,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info NULL); if (!err && retlen < len) { JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); - c->mtd->unpoint(c->mtd, ofs, retlen); + mtd_unpoint(c->mtd, ofs, retlen); } else if (err) JFFS2_WARNING("MTD point failed: error code %d.\n", err); else @@ -101,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, ofs, len); + mtd_unpoint(c->mtd, ofs, len); #endif if (crc != tn->data_crc) { @@ -137,7 +137,7 @@ free_out: kfree(buffer); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, ofs, len); + mtd_unpoint(c->mtd, ofs, len); #endif return err; } diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 53e05c8e5b6..72f3960f44a 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -102,7 +102,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (!ret && pointlen < c->mtd->size) { /* Don't muck about if it won't let us point to the whole flash */ D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); - c->mtd->unpoint(c->mtd, 0, pointlen); + mtd_unpoint(c->mtd, 0, pointlen); flashbuf = NULL; } if (ret) @@ -273,7 +273,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) kfree(flashbuf); #ifndef __ECOS else - c->mtd->unpoint(c->mtd, 0, c->mtd->size); + mtd_unpoint(c->mtd, 0, c->mtd->size); #endif kfree(s); return ret; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index ca7bfdaf7a6..a7d22b7fcb4 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -177,8 +177,6 @@ struct mtd_info { int (*erase) (struct mtd_info *mtd, struct erase_info *instr); int (*point) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys); - - /* We probably shouldn't allow XIP if the unpoint isn't a NULL */ void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len); /* Allow NOMMU mmap() to directly map the device (if not NULL) @@ -289,6 +287,12 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, return mtd->point(mtd, from, len, retlen, virt, phys); } +/* We probably shouldn't allow XIP if the unpoint isn't a NULL */ +static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) +{ + return mtd->unpoint(mtd, from, len); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 04c601bfa4cb29c968dcb66e44c799c9c01d8675 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 17:10:15 +0200 Subject: mtd: introduce mtd_get_unmapped_area interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 4 ++-- drivers/mtd/mtdpart.c | 3 +-- fs/romfs/mmap-nommu.c | 2 +- include/linux/mtd/mtd.h | 18 +++++++++++++----- 5 files changed, 18 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 41d64ff4c25..c51f04a00af 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1135,7 +1135,7 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, if (offset > mtd->size - len) return (unsigned long) -EINVAL; - return mtd->get_unmapped_area(mtd, len, offset, flags); + return mtd_get_unmapped_area(mtd, len, offset, flags); } /* can't map directly */ diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 76123bd4931..b3895cf20bb 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -726,8 +726,8 @@ static unsigned long concat_get_unmapped_area(struct mtd_info *mtd, return (unsigned long) -EINVAL; if (subdev->get_unmapped_area) - return subdev->get_unmapped_area(subdev, len, offset, - flags); + return mtd_get_unmapped_area(subdev, len, offset, + flags); break; } diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index b09624a5497..55a9cb544fc 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -108,8 +108,7 @@ static unsigned long part_get_unmapped_area(struct mtd_info *mtd, struct mtd_part *part = PART(mtd); offset += part->offset; - return part->master->get_unmapped_area(part->master, len, offset, - flags); + return mtd_get_unmapped_area(part->master, len, offset, flags); } static int part_read_oob(struct mtd_info *mtd, loff_t from, diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index eed99428f10..d5168e8e7dc 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -53,7 +53,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file, if (offset > mtd->size - len) return (unsigned long) -EINVAL; - return mtd->get_unmapped_area(mtd, len, offset, flags); + return mtd_get_unmapped_area(mtd, len, offset, flags); } cant_map_directly: diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a7d22b7fcb4..f38e8276b40 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -178,11 +178,6 @@ struct mtd_info { int (*point) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys); void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len); - - /* Allow NOMMU mmap() to directly map the device (if not NULL) - * - return the address to which the offset maps - * - return -ENOSYS to indicate refusal to do the mapping - */ unsigned long (*get_unmapped_area) (struct mtd_info *mtd, unsigned long len, unsigned long offset, @@ -293,6 +288,19 @@ static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) return mtd->unpoint(mtd, from, len); } +/* + * Allow NOMMU mmap() to directly map the device (if not NULL) + * - return the address to which the offset maps + * - return -ENOSYS to indicate refusal to do the mapping + */ +static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, + unsigned long len, + unsigned long offset, + unsigned long flags) +{ + return mtd->get_unmapped_area(mtd, len, offset, flags); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 329ad399a9b3adf52c90637b21ca029fcf7f8795 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 17:30:16 +0200 Subject: mtd: introduce mtd_read interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- arch/arm/mach-davinci/board-da850-evm.c | 2 +- arch/cris/arch-v32/drivers/axisflashmap.c | 4 +-- drivers/mtd/afs.c | 4 +-- drivers/mtd/ar7part.c | 15 ++++++----- drivers/mtd/bcm63xxpart.c | 12 ++++----- drivers/mtd/ftl.c | 41 ++++++++++++++++--------------- drivers/mtd/inftlcore.c | 19 ++++++++------ drivers/mtd/inftlmount.c | 10 ++++---- drivers/mtd/mtdblock.c | 8 +++--- drivers/mtd/mtdblock_ro.c | 2 +- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdoops.c | 4 +-- drivers/mtd/mtdpart.c | 3 +-- drivers/mtd/mtdswap.c | 4 +-- drivers/mtd/nand/diskonchip.c | 4 +-- drivers/mtd/nand/nand_bbt.c | 6 ++--- drivers/mtd/nftlcore.c | 17 ++++++++----- drivers/mtd/nftlmount.c | 6 ++--- drivers/mtd/redboot.c | 4 +-- drivers/mtd/rfd_ftl.c | 24 +++++++++--------- drivers/mtd/ssfdc.c | 6 ++--- drivers/mtd/tests/mtd_pagetest.c | 28 ++++++++++----------- drivers/mtd/tests/mtd_readtest.c | 2 +- drivers/mtd/tests/mtd_speedtest.c | 8 +++--- drivers/mtd/tests/mtd_stresstest.c | 2 +- drivers/mtd/tests/mtd_subpagetest.c | 8 +++--- drivers/mtd/tests/mtd_torturetest.c | 2 +- drivers/mtd/ubi/debug.c | 2 +- drivers/mtd/ubi/io.c | 6 ++--- drivers/staging/spectra/lld_mtd.c | 8 +++--- fs/jffs2/erase.c | 2 +- fs/jffs2/wbuf.c | 9 ++++--- fs/logfs/dev_mtd.c | 2 +- include/linux/mtd/mtd.h | 9 ++++++- 35 files changed, 152 insertions(+), 135 deletions(-) (limited to 'fs') diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 6659a90dbca..8b079f9d692 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -127,7 +127,7 @@ static void da850_evm_m25p80_notify_add(struct mtd_info *mtd) size_t retlen; if (!strcmp(mtd->name, "MAC-Address")) { - mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr); + mtd_read(mtd, 0, ETH_ALEN, &retlen, mac_addr); if (retlen == ETH_ALEN) pr_info("Read MAC addr from SPI Flash: %pM\n", mac_addr); diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index a2bde374462..011bddbf073 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -413,8 +413,8 @@ static int __init init_axis_flash(void) } while (blockstat && ptable_sector); #endif if (ptable_sector) { - main_mtd->read(main_mtd, ptable_sector, PAGESIZE, - &len, page); + mtd_read(main_mtd, ptable_sector, PAGESIZE, &len, + page); ptable_head = &((struct partitiontable *) page)->head; } diff --git a/drivers/mtd/afs.c b/drivers/mtd/afs.c index 89a02f6f65d..5a3942bf109 100644 --- a/drivers/mtd/afs.c +++ b/drivers/mtd/afs.c @@ -75,7 +75,7 @@ afs_read_footer(struct mtd_info *mtd, u_int *img_start, u_int *iis_start, size_t sz; int ret; - ret = mtd->read(mtd, ptr, sizeof(fs), &sz, (u_char *) &fs); + ret = mtd_read(mtd, ptr, sizeof(fs), &sz, (u_char *)&fs); if (ret >= 0 && sz != sizeof(fs)) ret = -EINVAL; @@ -132,7 +132,7 @@ afs_read_iis(struct mtd_info *mtd, struct image_info_struct *iis, u_int ptr) int ret, i; memset(iis, 0, sizeof(*iis)); - ret = mtd->read(mtd, ptr, sizeof(*iis), &sz, (u_char *) iis); + ret = mtd_read(mtd, ptr, sizeof(*iis), &sz, (u_char *)iis); if (ret < 0) goto failed; diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c index f40ea454755..94539312995 100644 --- a/drivers/mtd/ar7part.c +++ b/drivers/mtd/ar7part.c @@ -73,8 +73,8 @@ static int create_mtd_partitions(struct mtd_info *master, do { /* Try 10 blocks starting from master->erasesize */ offset = pre_size; - master->read(master, offset, - sizeof(header), &len, (uint8_t *)&header); + mtd_read(master, offset, sizeof(header), &len, + (uint8_t *)&header); if (!strncmp((char *)&header, "TIENV0.8", 8)) ar7_parts[1].offset = pre_size; if (header.checksum == LOADER_MAGIC1) @@ -95,16 +95,16 @@ static int create_mtd_partitions(struct mtd_info *master, case LOADER_MAGIC1: while (header.length) { offset += sizeof(header) + header.length; - master->read(master, offset, sizeof(header), - &len, (uint8_t *)&header); + mtd_read(master, offset, sizeof(header), &len, + (uint8_t *)&header); } root_offset = offset + sizeof(header) + 4; break; case LOADER_MAGIC2: while (header.length) { offset += sizeof(header) + header.length; - master->read(master, offset, sizeof(header), - &len, (uint8_t *)&header); + mtd_read(master, offset, sizeof(header), &len, + (uint8_t *)&header); } root_offset = offset + sizeof(header) + 4 + 0xff; root_offset &= ~(uint32_t)0xff; @@ -114,8 +114,7 @@ static int create_mtd_partitions(struct mtd_info *master, break; } - master->read(master, root_offset, - sizeof(header), &len, (u8 *)&header); + mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header); if (header.checksum != SQUASHFS_MAGIC) { root_offset += master->erasesize - 1; root_offset &= ~(master->erasesize - 1); diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c index 9ee8bc426e9..608321ee056 100644 --- a/drivers/mtd/bcm63xxpart.c +++ b/drivers/mtd/bcm63xxpart.c @@ -48,8 +48,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master) int ret; size_t retlen; - ret = master->read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen, - (void *)buf); + ret = mtd_read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen, + (void *)buf); buf[retlen] = 0; if (ret) @@ -59,8 +59,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master) return 0; /* very old CFE's do not have the cfe-v string, so check for magic */ - ret = master->read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen, - (void *)buf); + ret = mtd_read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen, + (void *)buf); buf[retlen] = 0; return strncmp("CFE1CFE1", buf, 8); @@ -95,8 +95,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master, return -ENOMEM; /* Get the tag */ - ret = master->read(master, cfelen, sizeof(struct bcm_tag), &retlen, - (void *)buf); + ret = mtd_read(master, cfelen, sizeof(struct bcm_tag), &retlen, + (void *)buf); if (retlen != sizeof(struct bcm_tag)) { vfree(buf); diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index a982889277c..12fd7ebd3fd 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -168,8 +168,8 @@ static int scan_header(partition_t *part) (offset + sizeof(header)) < max_offset; offset += part->mbd.mtd->erasesize ? : 0x2000) { - err = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &ret, - (unsigned char *)&header); + err = mtd_read(part->mbd.mtd, offset, sizeof(header), &ret, + (unsigned char *)&header); if (err) return err; @@ -224,8 +224,8 @@ static int build_maps(partition_t *part) for (i = 0; i < le16_to_cpu(part->header.NumEraseUnits); i++) { offset = ((i + le16_to_cpu(part->header.FirstPhysicalEUN)) << part->header.EraseUnitSize); - ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &retval, - (unsigned char *)&header); + ret = mtd_read(part->mbd.mtd, offset, sizeof(header), &retval, + (unsigned char *)&header); if (ret) goto out_XferInfo; @@ -289,9 +289,9 @@ static int build_maps(partition_t *part) part->EUNInfo[i].Deleted = 0; offset = part->EUNInfo[i].Offset + le32_to_cpu(header.BAMOffset); - ret = part->mbd.mtd->read(part->mbd.mtd, offset, - part->BlocksPerUnit * sizeof(uint32_t), &retval, - (unsigned char *)part->bam_cache); + ret = mtd_read(part->mbd.mtd, offset, + part->BlocksPerUnit * sizeof(uint32_t), &retval, + (unsigned char *)part->bam_cache); if (ret) goto out_bam_cache; @@ -485,9 +485,9 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, offset = eun->Offset + le32_to_cpu(part->header.BAMOffset); - ret = part->mbd.mtd->read(part->mbd.mtd, offset, - part->BlocksPerUnit * sizeof(uint32_t), - &retlen, (u_char *) (part->bam_cache)); + ret = mtd_read(part->mbd.mtd, offset, + part->BlocksPerUnit * sizeof(uint32_t), &retlen, + (u_char *)(part->bam_cache)); /* mark the cache bad, in case we get an error later */ part->bam_index = 0xffff; @@ -523,8 +523,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, break; case BLOCK_DATA: case BLOCK_REPLACEMENT: - ret = part->mbd.mtd->read(part->mbd.mtd, src, SECTOR_SIZE, - &retlen, (u_char *) buf); + ret = mtd_read(part->mbd.mtd, src, SECTOR_SIZE, &retlen, + (u_char *)buf); if (ret) { printk(KERN_WARNING "ftl: Error reading old xfer unit in copy_erase_unit\n"); return ret; @@ -747,10 +747,11 @@ static uint32_t find_free(partition_t *part) /* Invalidate cache */ part->bam_index = 0xffff; - ret = part->mbd.mtd->read(part->mbd.mtd, - part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset), - part->BlocksPerUnit * sizeof(uint32_t), - &retlen, (u_char *) (part->bam_cache)); + ret = mtd_read(part->mbd.mtd, + part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset), + part->BlocksPerUnit * sizeof(uint32_t), + &retlen, + (u_char *)(part->bam_cache)); if (ret) { printk(KERN_WARNING"ftl: Error reading BAM in find_free\n"); @@ -810,8 +811,8 @@ static int ftl_read(partition_t *part, caddr_t buffer, else { offset = (part->EUNInfo[log_addr / bsize].Offset + (log_addr % bsize)); - ret = part->mbd.mtd->read(part->mbd.mtd, offset, SECTOR_SIZE, - &retlen, (u_char *) buffer); + ret = mtd_read(part->mbd.mtd, offset, SECTOR_SIZE, &retlen, + (u_char *)buffer); if (ret) { printk(KERN_WARNING "Error reading MTD device in ftl_read()\n"); @@ -849,8 +850,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr, le32_to_cpu(part->header.BAMOffset)); #ifdef PSYCHO_DEBUG - ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(uint32_t), - &retlen, (u_char *)&old_addr); + ret = mtd_read(part->mbd.mtd, offset, sizeof(uint32_t), &retlen, + (u_char *)&old_addr); if (ret) { printk(KERN_WARNING"ftl: Error reading old_addr in set_bam_entry: %d\n",ret); return ret; diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index dd034efd187..0b038bed7b9 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -343,14 +343,17 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned if (BlockMap[block] == BLOCK_NIL) continue; - ret = mtd->read(mtd, (inftl->EraseSize * BlockMap[block]) + - (block * SECTORSIZE), SECTORSIZE, &retlen, - movebuf); + ret = mtd_read(mtd, + (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE), + SECTORSIZE, + &retlen, + movebuf); if (ret < 0 && !mtd_is_bitflip(ret)) { - ret = mtd->read(mtd, - (inftl->EraseSize * BlockMap[block]) + - (block * SECTORSIZE), SECTORSIZE, - &retlen, movebuf); + ret = mtd_read(mtd, + (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE), + SECTORSIZE, + &retlen, + movebuf); if (ret != -EIO) pr_debug("INFTL: error went away on retry?\n"); } @@ -914,7 +917,7 @@ foundit: } else { size_t retlen; loff_t ptr = (thisEUN * inftl->EraseSize) + blockofs; - int ret = mtd->read(mtd, ptr, SECTORSIZE, &retlen, buffer); + int ret = mtd_read(mtd, ptr, SECTORSIZE, &retlen, buffer); /* Handle corrected bit flips gracefully */ if (ret < 0 && !mtd_is_bitflip(ret)) diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 0d946f10a68..9bfbca5d88d 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -73,8 +73,8 @@ static int find_boot_record(struct INFTLrecord *inftl) * Check for BNAND header first. Then whinge if it's found * but later checks fail. */ - ret = mtd->read(mtd, block * inftl->EraseSize, - SECTORSIZE, &retlen, buf); + ret = mtd_read(mtd, block * inftl->EraseSize, SECTORSIZE, + &retlen, buf); /* We ignore ret in case the ECC of the MediaHeader is invalid (which is apparently acceptable) */ if (retlen != SECTORSIZE) { @@ -118,8 +118,8 @@ static int find_boot_record(struct INFTLrecord *inftl) memcpy(mh, buf, sizeof(struct INFTLMediaHeader)); /* Read the spare media header at offset 4096 */ - mtd->read(mtd, block * inftl->EraseSize + 4096, - SECTORSIZE, &retlen, buf); + mtd_read(mtd, block * inftl->EraseSize + 4096, SECTORSIZE, + &retlen, buf); if (retlen != SECTORSIZE) { printk(KERN_WARNING "INFTL: Unable to read spare " "Media Header\n"); @@ -342,7 +342,7 @@ static int check_free_sectors(struct INFTLrecord *inftl, unsigned int address, int i; for (i = 0; i < len; i += SECTORSIZE) { - if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf)) + if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf)) return -1; if (memcmpb(buf, 0xff, SECTORSIZE) != 0) return -1; diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 9b01cb0266e..b0644d2d2a6 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -184,8 +184,8 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos, mtdblk->cache_offset != sect_start) { /* fill the cache with the current sector */ mtdblk->cache_state = STATE_EMPTY; - ret = mtd->read(mtd, sect_start, sect_size, - &retlen, mtdblk->cache_data); + ret = mtd_read(mtd, sect_start, sect_size, + &retlen, mtdblk->cache_data); if (ret) return ret; if (retlen != sect_size) @@ -222,7 +222,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos, mtd->name, pos, len); if (!sect_size) - return mtd->read(mtd, pos, len, &retlen, buf); + return mtd_read(mtd, pos, len, &retlen, buf); while (len > 0) { unsigned long sect_start = (pos/sect_size)*sect_size; @@ -241,7 +241,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos, mtdblk->cache_offset == sect_start) { memcpy (buf, mtdblk->cache_data + offset, size); } else { - ret = mtd->read(mtd, pos, size, &retlen, buf); + ret = mtd_read(mtd, pos, size, &retlen, buf); if (ret) return ret; if (retlen != size) diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c index 0470a6e8630..f5737b1153f 100644 --- a/drivers/mtd/mtdblock_ro.c +++ b/drivers/mtd/mtdblock_ro.c @@ -30,7 +30,7 @@ static int mtdblock_readsect(struct mtd_blktrans_dev *dev, { size_t retlen; - if (dev->mtd->read(dev->mtd, (block * 512), 512, &retlen, buf)) + if (mtd_read(dev->mtd, (block * 512), 512, &retlen, buf)) return 1; return 0; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index c51f04a00af..c7f484687fa 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -232,7 +232,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count, break; } default: - ret = mtd->read(mtd, *ppos, len, &retlen, kbuf); + ret = mtd_read(mtd, *ppos, len, &retlen, kbuf); } /* Nand returns -EBADMSG on ECC errors, but it returns * the data. For our userspace tools it is important diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index b3895cf20bb..45460349fd1 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -91,7 +91,7 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len, /* Entire transaction goes into this subdev */ size = len; - err = subdev->read(subdev, from, size, &retsize, buf); + err = mtd_read(subdev, from, size, &retsize, buf); /* Save information about bitflips! */ if (unlikely(err)) { diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 9b2d8632316..23629ad0850 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -258,8 +258,8 @@ static void find_next_position(struct mtdoops_context *cxt) continue; /* Assume the page is used */ mark_page_used(cxt, page); - ret = mtd->read(mtd, page * record_size, MTDOOPS_HEADER_SIZE, - &retlen, (u_char *) &count[0]); + ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE, + &retlen, (u_char *)&count[0]); if (retlen != MTDOOPS_HEADER_SIZE || (ret < 0 && !mtd_is_bitflip(ret))) { printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n", diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 55a9cb544fc..59cd7974bc5 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -70,8 +70,7 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len, len = 0; else if (from + len > mtd->size) len = mtd->size - from; - res = part->master->read(part->master, from + part->offset, - len, retlen, buf); + res = mtd_read(part->master, from + part->offset, len, retlen, buf); if (unlikely(res)) { if (mtd_is_bitflip(res)) mtd->ecc_stats.corrected += part->master->ecc_stats.corrected - stats.corrected; diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 4e12875a916..b3282d2aa8f 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -736,7 +736,7 @@ static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, retries = 0; retry: - ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); + ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); if (ret < 0 && !mtd_is_bitflip(ret)) { oldeb = d->eb_data + oldblock / d->pages_per_eblk; @@ -1161,7 +1161,7 @@ static int mtdswap_readsect(struct mtd_blktrans_dev *dev, retries = 0; retry: - ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf); + ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); d->mtd_read_count++; if (mtd_is_bitflip(ret)) { diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index 5780dbab611..df921e7a496 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -1072,7 +1072,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch size_t retlen; for (offs = 0; offs < mtd->size; offs += mtd->erasesize) { - ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf); + ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf); if (retlen != mtd->writesize) continue; if (ret) { @@ -1097,7 +1097,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch /* Only one mediaheader was found. We want buf to contain a mediaheader on return, so we'll have to re-read the one we found. */ offs = doc->mh0_page << this->page_shift; - ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf); + ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf); if (retlen != mtd->writesize) { /* Insanity. Give up. */ printk(KERN_ERR "Read DiskOnChip Media Header once, but can't reread it???\n"); diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 69148ae3bf5..1bcd6bc6798 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -201,7 +201,7 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num, from += marker_len; marker_len = 0; } - res = mtd->read(mtd, from, len, &retlen, buf); + res = mtd_read(mtd, from, len, &retlen, buf); if (res < 0) { if (mtd_is_eccerr(res)) { pr_info("nand_bbt: ECC error in BBT at " @@ -298,7 +298,7 @@ static int scan_read_raw_data(struct mtd_info *mtd, uint8_t *buf, loff_t offs, if (td->options & NAND_BBT_VERSION) len++; - return mtd->read(mtd, offs, len, &retlen, buf); + return mtd_read(mtd, offs, len, &retlen, buf); } /* Scan read raw data from flash */ @@ -756,7 +756,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, /* Make it block aligned */ to &= ~((loff_t)((1 << this->bbt_erase_shift) - 1)); len = 1 << this->bbt_erase_shift; - res = mtd->read(mtd, to, len, &retlen, buf); + res = mtd_read(mtd, to, len, &retlen, buf); if (res < 0) { if (retlen != len) { pr_info("nand_bbt: error reading block " diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index cda77b562ad..1a9d9c1d3a7 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -423,12 +423,17 @@ static u16 NFTL_foldchain (struct NFTLrecord *nftl, unsigned thisVUC, unsigned p if (BlockMap[block] == BLOCK_NIL) continue; - ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512), - 512, &retlen, movebuf); + ret = mtd_read(mtd, + (nftl->EraseSize * BlockMap[block]) + (block * 512), + 512, + &retlen, + movebuf); if (ret < 0 && !mtd_is_bitflip(ret)) { - ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) - + (block * 512), 512, &retlen, - movebuf); + ret = mtd_read(mtd, + (nftl->EraseSize * BlockMap[block]) + (block * 512), + 512, + &retlen, + movebuf); if (ret != -EIO) printk("Error went away on retry.\n"); } @@ -771,7 +776,7 @@ static int nftl_readblock(struct mtd_blktrans_dev *mbd, unsigned long block, } else { loff_t ptr = (lastgoodEUN * nftl->EraseSize) + blockofs; size_t retlen; - int res = mtd->read(mtd, ptr, 512, &retlen, buffer); + int res = mtd_read(mtd, ptr, 512, &retlen, buffer); if (res < 0 && !mtd_is_bitflip(res)) return -EIO; diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c index 9164a56fb5c..b068dc8a366 100644 --- a/drivers/mtd/nftlmount.c +++ b/drivers/mtd/nftlmount.c @@ -63,8 +63,8 @@ static int find_boot_record(struct NFTLrecord *nftl) /* Check for ANAND header first. Then can whinge if it's found but later checks fail */ - ret = mtd->read(mtd, block * nftl->EraseSize, SECTORSIZE, - &retlen, buf); + ret = mtd_read(mtd, block * nftl->EraseSize, SECTORSIZE, + &retlen, buf); /* We ignore ret in case the ECC of the MediaHeader is invalid (which is apparently acceptable) */ if (retlen != SECTORSIZE) { @@ -274,7 +274,7 @@ static int check_free_sectors(struct NFTLrecord *nftl, unsigned int address, int int i; for (i = 0; i < len; i += SECTORSIZE) { - if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf)) + if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf)) return -1; if (memcmpb(buf, 0xff, SECTORSIZE) != 0) return -1; diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c index e366b1d84ea..623d9b86d0d 100644 --- a/drivers/mtd/redboot.c +++ b/drivers/mtd/redboot.c @@ -104,8 +104,8 @@ static int parse_redboot_partitions(struct mtd_info *master, printk(KERN_NOTICE "Searching for RedBoot partition table in %s at offset 0x%lx\n", master->name, offset); - ret = master->read(master, offset, - master->erasesize, &retlen, (void *)buf); + ret = mtd_read(master, offset, master->erasesize, &retlen, + (void *)buf); if (ret) goto out; diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index 39de8727a52..d9fe2d0533d 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -200,9 +200,9 @@ static int scan_header(struct partition *part) part->sector_map[i] = -1; for (i=0, blocks_found=0; itotal_blocks; i++) { - rc = part->mbd.mtd->read(part->mbd.mtd, - i * part->block_size, part->header_size, - &retlen, (u_char*)part->header_cache); + rc = mtd_read(part->mbd.mtd, i * part->block_size, + part->header_size, &retlen, + (u_char *)part->header_cache); if (!rc && retlen != part->header_size) rc = -EIO; @@ -250,8 +250,8 @@ static int rfd_ftl_readsect(struct mtd_blktrans_dev *dev, u_long sector, char *b addr = part->sector_map[sector]; if (addr != -1) { - rc = part->mbd.mtd->read(part->mbd.mtd, addr, SECTOR_SIZE, - &retlen, (u_char*)buf); + rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen, + (u_char *)buf); if (!rc && retlen != SECTOR_SIZE) rc = -EIO; @@ -372,9 +372,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old if (!map) goto err2; - rc = part->mbd.mtd->read(part->mbd.mtd, - part->blocks[block_no].offset, part->header_size, - &retlen, (u_char*)map); + rc = mtd_read(part->mbd.mtd, part->blocks[block_no].offset, + part->header_size, &retlen, (u_char *)map); if (!rc && retlen != part->header_size) rc = -EIO; @@ -413,8 +412,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old } continue; } - rc = part->mbd.mtd->read(part->mbd.mtd, addr, - SECTOR_SIZE, &retlen, sector_data); + rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen, + sector_data); if (!rc && retlen != SECTOR_SIZE) rc = -EIO; @@ -563,8 +562,9 @@ static int find_writable_block(struct partition *part, u_long *old_sector) } } - rc = part->mbd.mtd->read(part->mbd.mtd, part->blocks[block].offset, - part->header_size, &retlen, (u_char*)part->header_cache); + rc = mtd_read(part->mbd.mtd, part->blocks[block].offset, + part->header_size, &retlen, + (u_char *)part->header_cache); if (!rc && retlen != part->header_size) rc = -EIO; diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index 976e3d28b96..293e22a5710 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -123,8 +123,8 @@ static int get_valid_cis_sector(struct mtd_info *mtd) */ for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) { if (!mtd->block_isbad(mtd, offset)) { - ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, - sect_buf); + ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen, + sect_buf); /* CIS pattern match on the sector buffer */ if (ret < 0 || retlen != SECTOR_SIZE) { @@ -156,7 +156,7 @@ static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf, size_t retlen; loff_t offset = (loff_t)sect_no << SECTOR_SHIFT; - ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf); + ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf); if (ret < 0 || retlen != SECTOR_SIZE) return -1; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index 271819fabb5..6d62e24a03e 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -127,7 +127,7 @@ static int verify_eraseblock(int ebnum) set_random_data(writebuf, mtd->erasesize); for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) { /* Do a read to set the internal dataRAMs to different data */ - err = mtd->read(mtd, addr0, bufsize, &read, twopages); + err = mtd_read(mtd, addr0, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -135,7 +135,7 @@ static int verify_eraseblock(int ebnum) (long long)addr0); return err; } - err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages); + err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -145,7 +145,7 @@ static int verify_eraseblock(int ebnum) } memset(twopages, 0, bufsize); read = 0; - err = mtd->read(mtd, addr, bufsize, &read, twopages); + err = mtd_read(mtd, addr, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -163,7 +163,7 @@ static int verify_eraseblock(int ebnum) if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) { unsigned long oldnext = next; /* Do a read to set the internal dataRAMs to different data */ - err = mtd->read(mtd, addr0, bufsize, &read, twopages); + err = mtd_read(mtd, addr0, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -171,7 +171,7 @@ static int verify_eraseblock(int ebnum) (long long)addr0); return err; } - err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages); + err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -181,7 +181,7 @@ static int verify_eraseblock(int ebnum) } memset(twopages, 0, bufsize); read = 0; - err = mtd->read(mtd, addr, bufsize, &read, twopages); + err = mtd_read(mtd, addr, bufsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != bufsize) { @@ -230,7 +230,7 @@ static int crosstest(void) /* Read 2nd-to-last page to pp1 */ read = 0; addr = addrn - pgsize - pgsize; - err = mtd->read(mtd, addr, pgsize, &read, pp1); + err = mtd_read(mtd, addr, pgsize, &read, pp1); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -243,7 +243,7 @@ static int crosstest(void) /* Read 3rd-to-last page to pp1 */ read = 0; addr = addrn - pgsize - pgsize - pgsize; - err = mtd->read(mtd, addr, pgsize, &read, pp1); + err = mtd_read(mtd, addr, pgsize, &read, pp1); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -257,7 +257,7 @@ static int crosstest(void) read = 0; addr = addr0; printk(PRINT_PREF "reading page at %#llx\n", (long long)addr); - err = mtd->read(mtd, addr, pgsize, &read, pp2); + err = mtd_read(mtd, addr, pgsize, &read, pp2); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -271,7 +271,7 @@ static int crosstest(void) read = 0; addr = addrn - pgsize; printk(PRINT_PREF "reading page at %#llx\n", (long long)addr); - err = mtd->read(mtd, addr, pgsize, &read, pp3); + err = mtd_read(mtd, addr, pgsize, &read, pp3); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -285,7 +285,7 @@ static int crosstest(void) read = 0; addr = addr0; printk(PRINT_PREF "reading page at %#llx\n", (long long)addr); - err = mtd->read(mtd, addr, pgsize, &read, pp4); + err = mtd_read(mtd, addr, pgsize, &read, pp4); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -344,7 +344,7 @@ static int erasecrosstest(void) printk(PRINT_PREF "reading 1st page of block %d\n", ebnum); memset(readbuf, 0, pgsize); - err = mtd->read(mtd, addr0, pgsize, &read, readbuf); + err = mtd_read(mtd, addr0, pgsize, &read, readbuf); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -382,7 +382,7 @@ static int erasecrosstest(void) printk(PRINT_PREF "reading 1st page of block %d\n", ebnum); memset(readbuf, 0, pgsize); - err = mtd->read(mtd, addr0, pgsize, &read, readbuf); + err = mtd_read(mtd, addr0, pgsize, &read, readbuf); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { @@ -438,7 +438,7 @@ static int erasetest(void) return err; printk(PRINT_PREF "reading 1st page of block %d\n", ebnum); - err = mtd->read(mtd, addr0, pgsize, &read, twopages); + err = mtd_read(mtd, addr0, pgsize, &read, twopages); if (mtd_is_bitflip(err)) err = 0; if (err || read != pgsize) { diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c index 550fe51225a..0c58d2976c7 100644 --- a/drivers/mtd/tests/mtd_readtest.c +++ b/drivers/mtd/tests/mtd_readtest.c @@ -52,7 +52,7 @@ static int read_eraseblock_by_page(int ebnum) for (i = 0; i < pgcnt; i++) { memset(buf, 0 , pgcnt); - ret = mtd->read(mtd, addr, pgsize, &read, buf); + ret = mtd_read(mtd, addr, pgsize, &read, buf); if (ret == -EUCLEAN) ret = 0; if (ret || read != pgsize) { diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index f67a65e2104..3c9529bd0a6 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -214,7 +214,7 @@ static int read_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - err = mtd->read(mtd, addr, mtd->erasesize, &read, iobuf); + err = mtd_read(mtd, addr, mtd->erasesize, &read, iobuf); /* Ignore corrected ECC errors */ if (mtd_is_bitflip(err)) err = 0; @@ -235,7 +235,7 @@ static int read_eraseblock_by_page(int ebnum) void *buf = iobuf; for (i = 0; i < pgcnt; i++) { - err = mtd->read(mtd, addr, pgsize, &read, buf); + err = mtd_read(mtd, addr, pgsize, &read, buf); /* Ignore corrected ECC errors */ if (mtd_is_bitflip(err)) err = 0; @@ -261,7 +261,7 @@ static int read_eraseblock_by_2pages(int ebnum) void *buf = iobuf; for (i = 0; i < n; i++) { - err = mtd->read(mtd, addr, sz, &read, buf); + err = mtd_read(mtd, addr, sz, &read, buf); /* Ignore corrected ECC errors */ if (mtd_is_bitflip(err)) err = 0; @@ -276,7 +276,7 @@ static int read_eraseblock_by_2pages(int ebnum) buf += sz; } if (pgcnt % 2) { - err = mtd->read(mtd, addr, pgsize, &read, buf); + err = mtd_read(mtd, addr, pgsize, &read, buf); /* Ignore corrected ECC errors */ if (mtd_is_bitflip(err)) err = 0; diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index a204a9f9052..83a84372388 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -153,7 +153,7 @@ static int do_read(void) len = mtd->erasesize - offs; } addr = eb * mtd->erasesize + offs; - err = mtd->read(mtd, addr, len, &read, readbuf); + err = mtd_read(mtd, addr, len, &read, readbuf); if (mtd_is_bitflip(err)) err = 0; if (unlikely(err || read != len)) { diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index 16d0c05024d..d81f89a19da 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -196,7 +196,7 @@ static int verify_eraseblock(int ebnum) set_random_data(writebuf, subpgsize); clear_data(readbuf, subpgsize); read = 0; - err = mtd->read(mtd, addr, subpgsize, &read, readbuf); + err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { if (mtd_is_bitflip(err) && read == subpgsize) { printk(PRINT_PREF "ECC correction at %#llx\n", @@ -224,7 +224,7 @@ static int verify_eraseblock(int ebnum) set_random_data(writebuf, subpgsize); clear_data(readbuf, subpgsize); read = 0; - err = mtd->read(mtd, addr, subpgsize, &read, readbuf); + err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { if (mtd_is_bitflip(err) && read == subpgsize) { printk(PRINT_PREF "ECC correction at %#llx\n", @@ -262,7 +262,7 @@ static int verify_eraseblock2(int ebnum) set_random_data(writebuf, subpgsize * k); clear_data(readbuf, subpgsize * k); read = 0; - err = mtd->read(mtd, addr, subpgsize * k, &read, readbuf); + err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf); if (unlikely(err || read != subpgsize * k)) { if (mtd_is_bitflip(err) && read == subpgsize * k) { printk(PRINT_PREF "ECC correction at %#llx\n", @@ -296,7 +296,7 @@ static int verify_eraseblock_ff(int ebnum) for (j = 0; j < mtd->erasesize / subpgsize; ++j) { clear_data(readbuf, subpgsize); read = 0; - err = mtd->read(mtd, addr, subpgsize, &read, readbuf); + err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { if (mtd_is_bitflip(err) && read == subpgsize) { printk(PRINT_PREF "ECC correction at %#llx\n", diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index 102c79b7ac6..ecc68bf3f3f 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -137,7 +137,7 @@ static inline int check_eraseblock(int ebnum, unsigned char *buf) } retry: - err = mtd->read(mtd, addr, len, &read, check_buf); + err = mtd_read(mtd, addr, len, &read, check_buf); if (mtd_is_bitflip(err)) printk(PRINT_PREF "single bit flip occurred at EB %d " "MTD reported that it was fixed.\n", ebnum); diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index ab80c0debac..e2cdebf4084 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -216,7 +216,7 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) buf = vmalloc(len); if (!buf) return; - err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && err != -EUCLEAN) { ubi_err("error %d while reading %d bytes from PEB %d:%d, " "read %zd bytes", err, len, pnum, offset, read); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index b6c8959e6c7..433382951d3 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -170,7 +170,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, addr = (loff_t)pnum * ubi->peb_size + offset; retry: - err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err) { const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; @@ -1357,7 +1357,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, return 0; } - err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1); + err = mtd_read(ubi->mtd, addr, len, &read, buf1); if (err && !mtd_is_bitflip(err)) goto out_free; @@ -1421,7 +1421,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) return 0; } - err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && !mtd_is_bitflip(err)) { ubi_err("error %d while reading %d bytes from PEB %d:%d, " "read %zd bytes", err, len, pnum, offset, read); diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c index d638fafab64..eccd08d0e00 100644 --- a/drivers/staging/spectra/lld_mtd.c +++ b/drivers/staging/spectra/lld_mtd.c @@ -283,9 +283,11 @@ u16 mtd_Read_Page_Main(u8 *read_data, u32 Block, while (PageCount) { - ret = spectra_mtd->read(spectra_mtd, - (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), - DeviceInfo.wPageDataSize, &retlen, read_data); + ret = mtd_read(spectra_mtd, + (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), + DeviceInfo.wPageDataSize, + &retlen, + read_data); if (ret) { printk(KERN_ERR "%s failed %d\n", __func__, ret); return FAIL; diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index ffdf4fca9c5..c59d642cade 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -381,7 +381,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl *bad_offset = ofs; - ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf); + ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf); if (ret) { printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); ret = -EIO; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index b09e51d2f81..a24d3d21b63 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -228,7 +228,7 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf, size_t retlen; char *eccstr; - ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); + ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); if (ret && ret != -EUCLEAN && ret != -EBADMSG) { printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret); return ret; @@ -337,7 +337,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) } /* Do the read... */ - ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); + ret = mtd_read(c->mtd, start, c->wbuf_ofs - start, &retlen, + buf); /* ECC recovered ? */ if ((ret == -EUCLEAN || ret == -EBADMSG) && @@ -948,11 +949,11 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re int ret; if (!jffs2_is_writebuffered(c)) - return c->mtd->read(c->mtd, ofs, len, retlen, buf); + return mtd_read(c->mtd, ofs, len, retlen, buf); /* Read flash */ down_read(&c->wbuf_sem); - ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); + ret = mtd_read(c->mtd, ofs, len, retlen, buf); if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { if (ret == -EBADMSG) diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 04636289435..3ee64351685 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -20,7 +20,7 @@ static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len, size_t retlen; int ret; - ret = mtd->read(mtd, ofs, len, &retlen, buf); + ret = mtd_read(mtd, ofs, len, &retlen, buf); BUG_ON(ret == -EINVAL); if (ret) return ret; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index f38e8276b40..56478eb4bbc 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -182,6 +182,8 @@ struct mtd_info { unsigned long len, unsigned long offset, unsigned long flags); + int (*read) (struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf); /* Backing device capabilities for this device * - provides mmap capabilities @@ -189,7 +191,6 @@ struct mtd_info { struct backing_dev_info *backing_dev_info; - int (*read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); /* In blackbox flight recorder like scenarios we want to make successful @@ -301,6 +302,12 @@ static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, return mtd->get_unmapped_area(mtd, len, offset, flags); } +static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + return mtd->read(mtd, from, len, retlen, buf); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From eda95cbf75193808f62948fb0142ba0901d8bee2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 17:35:41 +0200 Subject: mtd: introduce mtd_write interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0020.c | 8 +++++--- drivers/mtd/ftl.c | 35 ++++++++++++++++++----------------- drivers/mtd/mtdblock.c | 4 ++-- drivers/mtd/mtdblock_ro.c | 2 +- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdcore.c | 3 ++- drivers/mtd/mtdoops.c | 4 ++-- drivers/mtd/mtdpart.c | 3 +-- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/rfd_ftl.c | 17 ++++++++--------- drivers/mtd/tests/mtd_pagetest.c | 8 ++++---- drivers/mtd/tests/mtd_speedtest.c | 8 ++++---- drivers/mtd/tests/mtd_stresstest.c | 2 +- drivers/mtd/tests/mtd_subpagetest.c | 6 +++--- drivers/mtd/tests/mtd_torturetest.c | 2 +- drivers/mtd/ubi/io.c | 7 +++---- drivers/staging/spectra/lld_mtd.c | 8 +++++--- fs/jffs2/wbuf.c | 18 +++++++++--------- fs/jffs2/writev.c | 5 +++-- fs/logfs/dev_mtd.c | 2 +- include/linux/mtd/mtd.h | 9 ++++++++- 22 files changed, 84 insertions(+), 73 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c index 666c52f8bf8..85e80180b65 100644 --- a/drivers/mtd/chips/cfi_cmdset_0020.c +++ b/drivers/mtd/chips/cfi_cmdset_0020.c @@ -699,7 +699,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs, continue; } memcpy(buffer+buflen, elem_base, ECCBUF_SIZE-buflen); - ret = mtd->write(mtd, to, ECCBUF_SIZE, &thislen, buffer); + ret = mtd_write(mtd, to, ECCBUF_SIZE, &thislen, + buffer); totlen += thislen; if (ret || thislen != ECCBUF_SIZE) goto write_error; @@ -708,7 +709,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs, to += ECCBUF_SIZE; } if (ECCBUF_DIV(elem_len)) { /* write clean aligned data */ - ret = mtd->write(mtd, to, ECCBUF_DIV(elem_len), &thislen, elem_base); + ret = mtd_write(mtd, to, ECCBUF_DIV(elem_len), + &thislen, elem_base); totlen += thislen; if (ret || thislen != ECCBUF_DIV(elem_len)) goto write_error; @@ -722,7 +724,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs, } if (buflen) { /* flush last page, even if not full */ /* This is sometimes intended behaviour, really */ - ret = mtd->write(mtd, to, buflen, &thislen, buffer); + ret = mtd_write(mtd, to, buflen, &thislen, buffer); totlen += thislen; if (ret || thislen != ECCBUF_SIZE) goto write_error; diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 12fd7ebd3fd..d591b1d0a6c 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -422,8 +422,8 @@ static int prepare_xfer(partition_t *part, int i) header.LogicalEUN = cpu_to_le16(0xffff); header.EraseCount = cpu_to_le32(xfer->EraseCount); - ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset, sizeof(header), - &retlen, (u_char *)&header); + ret = mtd_write(part->mbd.mtd, xfer->Offset, sizeof(header), &retlen, + (u_char *)&header); if (ret) { return ret; @@ -438,8 +438,8 @@ static int prepare_xfer(partition_t *part, int i) for (i = 0; i < nbam; i++, offset += sizeof(uint32_t)) { - ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t), - &retlen, (u_char *)&ctl); + ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen, + (u_char *)&ctl); if (ret) return ret; @@ -503,8 +503,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, offset = xfer->Offset + 20; /* Bad! */ unit = cpu_to_le16(0x7fff); - ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint16_t), - &retlen, (u_char *) &unit); + ret = mtd_write(part->mbd.mtd, offset, sizeof(uint16_t), &retlen, + (u_char *)&unit); if (ret) { printk( KERN_WARNING "ftl: Failed to write back to BAM cache in copy_erase_unit()!\n"); @@ -531,8 +531,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, } - ret = part->mbd.mtd->write(part->mbd.mtd, dest, SECTOR_SIZE, - &retlen, (u_char *) buf); + ret = mtd_write(part->mbd.mtd, dest, SECTOR_SIZE, &retlen, + (u_char *)buf); if (ret) { printk(KERN_WARNING "ftl: Error writing new xfer unit in copy_erase_unit\n"); return ret; @@ -550,9 +550,11 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, } /* Write the BAM to the transfer unit */ - ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + le32_to_cpu(part->header.BAMOffset), - part->BlocksPerUnit * sizeof(int32_t), &retlen, - (u_char *)part->bam_cache); + ret = mtd_write(part->mbd.mtd, + xfer->Offset + le32_to_cpu(part->header.BAMOffset), + part->BlocksPerUnit * sizeof(int32_t), + &retlen, + (u_char *)part->bam_cache); if (ret) { printk( KERN_WARNING "ftl: Error writing BAM in copy_erase_unit\n"); return ret; @@ -560,8 +562,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit, /* All clear? Then update the LogicalEUN again */ - ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t), - &retlen, (u_char *)&srcunitswap); + ret = mtd_write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t), + &retlen, (u_char *)&srcunitswap); if (ret) { printk(KERN_WARNING "ftl: Error writing new LogicalEUN in copy_erase_unit\n"); @@ -887,8 +889,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr, #endif part->bam_cache[blk] = le_virt_addr; } - ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t), - &retlen, (u_char *)&le_virt_addr); + ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen, + (u_char *)&le_virt_addr); if (ret) { printk(KERN_NOTICE "ftl_cs: set_bam_entry() failed!\n"); @@ -947,8 +949,7 @@ static int ftl_write(partition_t *part, caddr_t buffer, part->EUNInfo[part->bam_index].Deleted++; offset = (part->EUNInfo[part->bam_index].Offset + blk * SECTOR_SIZE); - ret = part->mbd.mtd->write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen, - buffer); + ret = mtd_write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen, buffer); if (ret) { printk(KERN_NOTICE "ftl_cs: block write failed!\n"); diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index b0644d2d2a6..ac7f1f1faa2 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -102,7 +102,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos, * Next, write the data to flash. */ - ret = mtd->write(mtd, pos, len, &retlen, buf); + ret = mtd_write(mtd, pos, len, &retlen, buf); if (ret) return ret; if (retlen != len) @@ -152,7 +152,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos, mtd->name, pos, len); if (!sect_size) - return mtd->write(mtd, pos, len, &retlen, buf); + return mtd_write(mtd, pos, len, &retlen, buf); while (len > 0) { unsigned long sect_start = (pos/sect_size)*sect_size; diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c index f5737b1153f..92759a9d298 100644 --- a/drivers/mtd/mtdblock_ro.c +++ b/drivers/mtd/mtdblock_ro.c @@ -40,7 +40,7 @@ static int mtdblock_writesect(struct mtd_blktrans_dev *dev, { size_t retlen; - if (dev->mtd->write(dev->mtd, (block * 512), 512, &retlen, buf)) + if (mtd_write(dev->mtd, (block * 512), 512, &retlen, buf)) return 1; return 0; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index c7f484687fa..922da31d2c6 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -331,7 +331,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c } default: - ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf); + ret = mtd_write(mtd, *ppos, len, &retlen, kbuf); } if (!ret) { *ppos += retlen; diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 45460349fd1..45215501c4c 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -148,7 +148,7 @@ concat_write(struct mtd_info *mtd, loff_t to, size_t len, if (!(subdev->flags & MTD_WRITEABLE)) err = -EROFS; else - err = subdev->write(subdev, to, size, &retsize, buf); + err = mtd_write(subdev, to, size, &retsize, buf); if (err) break; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index b01993ea260..e36191ab47c 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -699,7 +699,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, for (i=0; iwrite(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); + ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, + vecs[i].iov_base); totlen += thislen; if (ret || thislen != vecs[i].iov_len) break; diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 23629ad0850..9c9d58617c9 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -225,8 +225,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic) ret = mtd->panic_write(mtd, cxt->nextpage * record_size, record_size, &retlen, cxt->oops_buf); else - ret = mtd->write(mtd, cxt->nextpage * record_size, - record_size, &retlen, cxt->oops_buf); + ret = mtd_write(mtd, cxt->nextpage * record_size, + record_size, &retlen, cxt->oops_buf); if (retlen != record_size || ret < 0) printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n", diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 59cd7974bc5..96574a03656 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -188,8 +188,7 @@ static int part_write(struct mtd_info *mtd, loff_t to, size_t len, len = 0; else if (to + len > mtd->size) len = mtd->size - to; - return part->master->write(part->master, to + part->offset, - len, retlen, buf); + return mtd_write(part->master, to + part->offset, len, retlen, buf); } static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len, diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index b3282d2aa8f..6ff823e29c0 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -689,7 +689,7 @@ retry: return ret; writepos = (loff_t)*bp << PAGE_SHIFT; - ret = mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf); + ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); if (ret == -EIO || mtd_is_eccerr(ret)) { d->curr_write_pos--; eb->active_count--; diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index d9fe2d0533d..c594bb7abfa 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -304,9 +304,8 @@ static void erase_callback(struct erase_info *erase) part->blocks[i].used_sectors = 0; part->blocks[i].erases++; - rc = part->mbd.mtd->write(part->mbd.mtd, - part->blocks[i].offset, sizeof(magic), &retlen, - (u_char*)&magic); + rc = mtd_write(part->mbd.mtd, part->blocks[i].offset, sizeof(magic), + &retlen, (u_char *)&magic); if (!rc && retlen != sizeof(magic)) rc = -EIO; @@ -595,8 +594,8 @@ static int mark_sector_deleted(struct partition *part, u_long old_addr) addr = part->blocks[block].offset + (HEADER_MAP_OFFSET + offset) * sizeof(u16); - rc = part->mbd.mtd->write(part->mbd.mtd, addr, - sizeof(del), &retlen, (u_char*)&del); + rc = mtd_write(part->mbd.mtd, addr, sizeof(del), &retlen, + (u_char *)&del); if (!rc && retlen != sizeof(del)) rc = -EIO; @@ -668,8 +667,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf, addr = (i + part->header_sectors_per_block) * SECTOR_SIZE + block->offset; - rc = part->mbd.mtd->write(part->mbd.mtd, - addr, SECTOR_SIZE, &retlen, (u_char*)buf); + rc = mtd_write(part->mbd.mtd, addr, SECTOR_SIZE, &retlen, + (u_char *)buf); if (!rc && retlen != SECTOR_SIZE) rc = -EIO; @@ -688,8 +687,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf, part->header_cache[i + HEADER_MAP_OFFSET] = entry; addr = block->offset + (HEADER_MAP_OFFSET + i) * sizeof(u16); - rc = part->mbd.mtd->write(part->mbd.mtd, addr, - sizeof(entry), &retlen, (u_char*)&entry); + rc = mtd_write(part->mbd.mtd, addr, sizeof(entry), &retlen, + (u_char *)&entry); if (!rc && retlen != sizeof(entry)) rc = -EIO; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index 6d62e24a03e..83da97e54f9 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -100,7 +100,7 @@ static int write_eraseblock(int ebnum) set_random_data(writebuf, mtd->erasesize); cond_resched(); - err = mtd->write(mtd, addr, mtd->erasesize, &written, writebuf); + err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf); if (err || written != mtd->erasesize) printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr); @@ -335,7 +335,7 @@ static int erasecrosstest(void) printk(PRINT_PREF "writing 1st page of block %d\n", ebnum); set_random_data(writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); - err = mtd->write(mtd, addr0, pgsize, &written, writebuf); + err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr0); @@ -368,7 +368,7 @@ static int erasecrosstest(void) printk(PRINT_PREF "writing 1st page of block %d\n", ebnum); set_random_data(writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); - err = mtd->write(mtd, addr0, pgsize, &written, writebuf); + err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr0); @@ -425,7 +425,7 @@ static int erasetest(void) printk(PRINT_PREF "writing 1st page of block %d\n", ebnum); set_random_data(writebuf, pgsize); - err = mtd->write(mtd, addr0, pgsize, &written, writebuf); + err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr0); diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index 3c9529bd0a6..c7b18e18908 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -143,7 +143,7 @@ static int write_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - err = mtd->write(mtd, addr, mtd->erasesize, &written, iobuf); + err = mtd_write(mtd, addr, mtd->erasesize, &written, iobuf); if (err || written != mtd->erasesize) { printk(PRINT_PREF "error: write failed at %#llx\n", addr); if (!err) @@ -161,7 +161,7 @@ static int write_eraseblock_by_page(int ebnum) void *buf = iobuf; for (i = 0; i < pgcnt; i++) { - err = mtd->write(mtd, addr, pgsize, &written, buf); + err = mtd_write(mtd, addr, pgsize, &written, buf); if (err || written != pgsize) { printk(PRINT_PREF "error: write failed at %#llx\n", addr); @@ -184,7 +184,7 @@ static int write_eraseblock_by_2pages(int ebnum) void *buf = iobuf; for (i = 0; i < n; i++) { - err = mtd->write(mtd, addr, sz, &written, buf); + err = mtd_write(mtd, addr, sz, &written, buf); if (err || written != sz) { printk(PRINT_PREF "error: write failed at %#llx\n", addr); @@ -196,7 +196,7 @@ static int write_eraseblock_by_2pages(int ebnum) buf += sz; } if (pgcnt % 2) { - err = mtd->write(mtd, addr, pgsize, &written, buf); + err = mtd_write(mtd, addr, pgsize, &written, buf); if (err || written != pgsize) { printk(PRINT_PREF "error: write failed at %#llx\n", addr); diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 83a84372388..f8aac4b7e59 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -192,7 +192,7 @@ static int do_write(void) } } addr = eb * mtd->erasesize + offs; - err = mtd->write(mtd, addr, len, &written, writebuf); + err = mtd_write(mtd, addr, len, &written, writebuf); if (unlikely(err || written != len)) { printk(PRINT_PREF "error: write failed at 0x%llx\n", (long long)addr); diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index d81f89a19da..b90c01036b4 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -120,7 +120,7 @@ static int write_eraseblock(int ebnum) loff_t addr = ebnum * mtd->erasesize; set_random_data(writebuf, subpgsize); - err = mtd->write(mtd, addr, subpgsize, &written, writebuf); + err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr); @@ -134,7 +134,7 @@ static int write_eraseblock(int ebnum) addr += subpgsize; set_random_data(writebuf, subpgsize); - err = mtd->write(mtd, addr, subpgsize, &written, writebuf); + err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr); @@ -158,7 +158,7 @@ static int write_eraseblock2(int ebnum) if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) break; set_random_data(writebuf, subpgsize * k); - err = mtd->write(mtd, addr, subpgsize * k, &written, writebuf); + err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf); if (unlikely(err || written != subpgsize * k)) { printk(PRINT_PREF "error: write failed at %#llx\n", (long long)addr); diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index ecc68bf3f3f..dd34a519fa7 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -189,7 +189,7 @@ static inline int write_pattern(int ebnum, void *buf) addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize; len = pgcnt * pgsize; } - err = mtd->write(mtd, addr, len, &written, buf); + err = mtd_write(mtd, addr, len, &written, buf); if (err) { printk(PRINT_PREF "error %d while writing EB %d, written %zd" " bytes\n", err, ebnum, written); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 433382951d3..8d832fc9e9e 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -289,7 +289,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, } addr = (loff_t)pnum * ubi->peb_size + offset; - err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf); + err = mtd_write(ubi->mtd, addr, len, &written, buf); if (err) { ubi_err("error %d while writing %d bytes to PEB %d:%d, written " "%zd bytes", err, len, pnum, offset, written); @@ -525,11 +525,10 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) * the header comment in scan.c for more information). */ addr = (loff_t)pnum * ubi->peb_size; - err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data); + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if (!err) { addr += ubi->vid_hdr_aloffset; - err = ubi->mtd->write(ubi->mtd, addr, 4, &written, - (void *)&data); + err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if (!err) return 0; } diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c index eccd08d0e00..2eb03213196 100644 --- a/drivers/staging/spectra/lld_mtd.c +++ b/drivers/staging/spectra/lld_mtd.c @@ -233,9 +233,11 @@ u16 mtd_Write_Page_Main(u8 *write_data, u32 Block, while (PageCount) { - ret = spectra_mtd->write(spectra_mtd, - (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), - DeviceInfo.wPageDataSize, &retlen, write_data); + ret = mtd_write(spectra_mtd, + (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), + DeviceInfo.wPageDataSize, + &retlen, + write_data); if (ret) { printk(KERN_ERR "%s failed %d\n", __func__, ret); return FAIL; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a24d3d21b63..3ea2f8db935 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -414,13 +414,12 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) if (breakme++ == 20) { printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); breakme = 0; - c->mtd->write(c->mtd, ofs, towrite, &retlen, - brokenbuf); + mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf); ret = -EIO; } else #endif - ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, - rewrite_buf); + ret = mtd_write(c->mtd, ofs, towrite, &retlen, + rewrite_buf); if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) { /* Argh. We tried. Really we did. */ @@ -620,13 +619,14 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) if (breakme++ == 20) { printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); breakme = 0; - c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, - brokenbuf); + mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, + brokenbuf); ret = -EIO; } else #endif - ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); + ret = mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, + &retlen, c->wbuf); if (ret) { printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret); @@ -862,8 +862,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, v += wbuf_retlen; if (vlen >= c->wbuf_pagesize) { - ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen), - &wbuf_retlen, v); + ret = mtd_write(c->mtd, outvec_to, PAGE_DIV(vlen), + &wbuf_retlen, v); if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen)) goto outfile; diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c index b9276b11bac..b05710fd552 100644 --- a/fs/jffs2/writev.c +++ b/fs/jffs2/writev.c @@ -26,7 +26,8 @@ static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs, for (i=0; iwrite(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); + ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, + vecs[i].iov_base); totlen += thislen; if (ret || thislen != vecs[i].iov_len) break; @@ -61,7 +62,7 @@ int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, const u_char *buf) { int ret; - ret = c->mtd->write(c->mtd, ofs, len, retlen, buf); + ret = mtd_write(c->mtd, ofs, len, retlen, buf); if (jffs2_sum_active()) { struct kvec vecs[1]; diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 3ee64351685..1842440d656 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -49,7 +49,7 @@ static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len, BUG_ON(len > PAGE_CACHE_SIZE); page_start = ofs & PAGE_CACHE_MASK; page_end = PAGE_CACHE_ALIGN(ofs + len) - 1; - ret = mtd->write(mtd, ofs, len, &retlen, buf); + ret = mtd_write(mtd, ofs, len, &retlen, buf); if (ret || (retlen != len)) return -EIO; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 56478eb4bbc..1da7f4a6ef8 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -184,6 +184,8 @@ struct mtd_info { unsigned long flags); int (*read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); + int (*write) (struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf); /* Backing device capabilities for this device * - provides mmap capabilities @@ -191,7 +193,6 @@ struct mtd_info { struct backing_dev_info *backing_dev_info; - int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); /* In blackbox flight recorder like scenarios we want to make successful writes in interrupt context. panic_write() is only intended to be @@ -308,6 +309,12 @@ static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, return mtd->read(mtd, from, len, retlen, buf); } +static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + return mtd->write(mtd, to, len, retlen, buf); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From fd2819bbc92fc98bed5d612e4acbe16b6326f6bf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 18:27:05 +0200 Subject: mtd: introduce mtd_read_oob interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/inftlcore.c | 2 +- drivers/mtd/mtdchar.c | 4 ++-- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdpart.c | 2 +- drivers/mtd/mtdswap.c | 4 ++-- drivers/mtd/nand/nand_bbt.c | 6 +++--- drivers/mtd/nftlcore.c | 2 +- drivers/mtd/sm_ftl.c | 2 +- drivers/mtd/ssfdc.c | 2 +- drivers/mtd/tests/mtd_oobtest.c | 14 +++++++------- drivers/mtd/tests/mtd_readtest.c | 2 +- drivers/staging/spectra/lld_mtd.c | 12 ++++++------ fs/jffs2/wbuf.c | 4 ++-- include/linux/mtd/mtd.h | 10 ++++++++-- 14 files changed, 37 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index 0b038bed7b9..07646e1273e 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -158,7 +158,7 @@ int inftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len, ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->read_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd_read_oob(mtd, offs & ~(mtd->writesize - 1), &ops); *retlen = ops.oobretlen; return res; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 922da31d2c6..e74f570a7b9 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -227,7 +227,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count, ops.oobbuf = NULL; ops.len = len; - ret = mtd->read_oob(mtd, *ppos, &ops); + ret = mtd_read_oob(mtd, *ppos, &ops); retlen = ops.retlen; break; } @@ -471,7 +471,7 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd, return -ENOMEM; start &= ~((uint64_t)mtd->writesize - 1); - ret = mtd->read_oob(mtd, start, &ops); + ret = mtd_read_oob(mtd, start, &ops); if (put_user(ops.oobretlen, retp)) ret = -EFAULT; diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 45215501c4c..cf35642e5f4 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -273,7 +273,7 @@ concat_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) if (from + devops.len > subdev->size) devops.len = subdev->size - from; - err = subdev->read_oob(subdev, from, &devops); + err = mtd_read_oob(subdev, from, &devops); ops->retlen += devops.retlen; ops->oobretlen += devops.oobretlen; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 9ed58f7d746..6fdc74ef19c 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -138,7 +138,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, return -EINVAL; } - res = part->master->read_oob(part->master, from + part->offset, ops); + res = mtd_read_oob(part->master, from + part->offset, ops); if (unlikely(res)) { if (mtd_is_bitflip(res)) mtd->ecc_stats.corrected++; diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 6ff823e29c0..0f0ab18d440 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -312,7 +312,7 @@ static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, struct mtd_oob_ops *ops) { - int ret = d->mtd->read_oob(d->mtd, from, ops); + int ret = mtd_read_oob(d->mtd, from, ops); if (mtd_is_bitflip(ret)) return ret; @@ -955,7 +955,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, pos = base; for (i = 0; i < mtd_pages; i++) { - ret = mtd->read_oob(mtd, pos, &ops); + ret = mtd_read_oob(mtd, pos, &ops); if (ret) goto error; diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 1bcd6bc6798..fcab50e80b9 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -317,7 +317,7 @@ static int scan_read_raw_oob(struct mtd_info *mtd, uint8_t *buf, loff_t offs, ops.len = min(len, (size_t)mtd->writesize); ops.oobbuf = buf + ops.len; - res = mtd->read_oob(mtd, offs, &ops); + res = mtd_read_oob(mtd, offs, &ops); if (res) return res; @@ -434,7 +434,7 @@ static int scan_block_fast(struct mtd_info *mtd, struct nand_bbt_descr *bd, * Read the full oob until read_oob is fixed to handle single * byte reads for 16 bit buswidth. */ - ret = mtd->read_oob(mtd, offs, &ops); + ret = mtd_read_oob(mtd, offs, &ops); /* Ignore ECC errors when checking for BBM */ if (ret && !mtd_is_bitflip_or_eccerr(ret)) return ret; @@ -769,7 +769,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf, /* Read oob data */ ops.ooblen = (len >> this->page_shift) * mtd->oobsize; ops.oobbuf = &buf[len]; - res = mtd->read_oob(mtd, to + mtd->writesize, &ops); + res = mtd_read_oob(mtd, to + mtd->writesize, &ops); if (res < 0 || ops.oobretlen != ops.ooblen) goto outerr; diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 1a9d9c1d3a7..7497f5efc26 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -153,7 +153,7 @@ int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len, ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->read_oob(mtd, offs & ~mask, &ops); + res = mtd_read_oob(mtd, offs & ~mask, &ops); *retlen = ops.oobretlen; return res; } diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 2f1acb1ab5e..748aa441669 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -278,7 +278,7 @@ again: /* Unfortunately, oob read will _always_ succeed, despite card removal..... */ - ret = mtd->read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops); + ret = mtd_read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops); /* Test for unknown errors */ if (ret != 0 && !mtd_is_bitflip_or_eccerr(ret)) { diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index 293e22a5710..0e688133835 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -175,7 +175,7 @@ static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf) ops.oobbuf = buf; ops.datbuf = NULL; - ret = mtd->read_oob(mtd, offs, &ops); + ret = mtd_read_oob(mtd, offs, &ops); if (ret < 0 || ops.oobretlen != OOB_SIZE) return -1; diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index 7d52854c16d..962d27a64e6 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -192,7 +192,7 @@ static int verify_eraseblock(int ebnum) ops.ooboffs = use_offset; ops.datbuf = NULL; ops.oobbuf = readbuf; - err = mtd->read_oob(mtd, addr, &ops); + err = mtd_read_oob(mtd, addr, &ops); if (err || ops.oobretlen != use_len) { printk(PRINT_PREF "error: readoob failed at %#llx\n", (long long)addr); @@ -219,7 +219,7 @@ static int verify_eraseblock(int ebnum) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = readbuf; - err = mtd->read_oob(mtd, addr, &ops); + err = mtd_read_oob(mtd, addr, &ops); if (err || ops.oobretlen != mtd->ecclayout->oobavail) { printk(PRINT_PREF "error: readoob failed at " "%#llx\n", (long long)addr); @@ -284,7 +284,7 @@ static int verify_eraseblock_in_one_go(int ebnum) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = readbuf; - err = mtd->read_oob(mtd, addr, &ops); + err = mtd_read_oob(mtd, addr, &ops); if (err || ops.oobretlen != len) { printk(PRINT_PREF "error: readoob failed at %#llx\n", (long long)addr); @@ -544,7 +544,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = readbuf; printk(PRINT_PREF "attempting to start read past end of OOB\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->read_oob(mtd, addr0, &ops); + err = mtd_read_oob(mtd, addr0, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -588,7 +588,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = readbuf; printk(PRINT_PREF "attempting to read past end of device\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops); + err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -632,7 +632,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = readbuf; printk(PRINT_PREF "attempting to read past end of device\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops); + err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -698,7 +698,7 @@ static int __init mtd_oobtest_init(void) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = readbuf; - err = mtd->read_oob(mtd, addr, &ops); + err = mtd_read_oob(mtd, addr, &ops); if (err) goto out; if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) { diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c index 0c58d2976c7..5eaeada8428 100644 --- a/drivers/mtd/tests/mtd_readtest.c +++ b/drivers/mtd/tests/mtd_readtest.c @@ -74,7 +74,7 @@ static int read_eraseblock_by_page(int ebnum) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = oobbuf; - ret = mtd->read_oob(mtd, addr, &ops); + ret = mtd_read_oob(mtd, addr, &ops); if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != mtd->oobsize) { printk(PRINT_PREF "error: read oob failed at " diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c index 2eb03213196..ed8e5f06708 100644 --- a/drivers/staging/spectra/lld_mtd.c +++ b/drivers/staging/spectra/lld_mtd.c @@ -351,9 +351,9 @@ u16 mtd_Read_Page_Main_Spare(u8 *read_data, u32 Block, ops.ooblen = BTSIG_BYTES; ops.ooboffs = 0; - ret = spectra_mtd->read_oob(spectra_mtd, - (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), - &ops); + ret = mtd_read_oob(spectra_mtd, + (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), + &ops); if (ret) { printk(KERN_ERR "%s failed %d\n", __func__, ret); return FAIL; @@ -484,9 +484,9 @@ u16 mtd_Read_Page_Spare(u8 *read_data, u32 Block, ops.ooblen = BTSIG_BYTES; ops.ooboffs = 0; - ret = spectra_mtd->read_oob(spectra_mtd, - (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), - &ops); + ret = mtd_read_oob(spectra_mtd, + (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), + &ops); if (ret) { printk(KERN_ERR "%s failed %d\n", __func__, ret); return FAIL; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 3ea2f8db935..efc0cb37030 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1032,7 +1032,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; ops.datbuf = NULL; - ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); + ret = mtd_read_oob(c->mtd, jeb->offset, &ops); if (ret || ops.oobretlen != ops.ooblen) { printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" " bytes, read %zd bytes, error %d\n", @@ -1075,7 +1075,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; ops.datbuf = NULL; - ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); + ret = mtd_read_oob(c->mtd, jeb->offset, &ops); if (ret || ops.oobretlen != ops.ooblen) { printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" " bytes, read %zd bytes, error %d\n", diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 2fb83cd3d26..0db8d87ce45 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -188,6 +188,8 @@ struct mtd_info { size_t *retlen, const u_char *buf); int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); + int (*read_oob) (struct mtd_info *mtd, loff_t from, + struct mtd_oob_ops *ops); /* Backing device capabilities for this device * - provides mmap capabilities @@ -195,8 +197,6 @@ struct mtd_info { struct backing_dev_info *backing_dev_info; - int (*read_oob) (struct mtd_info *mtd, loff_t from, - struct mtd_oob_ops *ops); int (*write_oob) (struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); @@ -320,6 +320,12 @@ static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, return mtd->panic_write(mtd, to, len, retlen, buf); } +static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from, + struct mtd_oob_ops *ops) +{ + return mtd->read_oob(mtd, from, ops); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From a2cc5ba075f9bc837d0b4d4ec7328dcefc11859d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 18:29:55 +0200 Subject: mtd: introduce mtd_write_oob interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/inftlcore.c | 4 ++-- drivers/mtd/mtdchar.c | 6 +++--- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdpart.c | 2 +- drivers/mtd/mtdswap.c | 4 ++-- drivers/mtd/nand/nand_bbt.c | 2 +- drivers/mtd/nand/sm_common.c | 2 +- drivers/mtd/nftlcore.c | 4 ++-- drivers/mtd/sm_ftl.c | 2 +- drivers/mtd/tests/mtd_oobtest.c | 10 +++++----- drivers/staging/spectra/lld_mtd.c | 6 +++--- fs/jffs2/wbuf.c | 2 +- include/linux/mtd/mtd.h | 12 ++++++++---- 13 files changed, 31 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index 07646e1273e..28646c95cfb 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -178,7 +178,7 @@ int inftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len, ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); *retlen = ops.oobretlen; return res; } @@ -199,7 +199,7 @@ static int inftl_write(struct mtd_info *mtd, loff_t offs, size_t len, ops.datbuf = buf; ops.len = len; - res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); + res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops); *retlen = ops.retlen; return res; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index e74f570a7b9..234e3d27143 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -325,7 +325,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c ops.ooboffs = 0; ops.len = len; - ret = mtd->write_oob(mtd, *ppos, &ops); + ret = mtd_write_oob(mtd, *ppos, &ops); retlen = ops.retlen; break; } @@ -426,7 +426,7 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd, return PTR_ERR(ops.oobbuf); start &= ~((uint64_t)mtd->writesize - 1); - ret = mtd->write_oob(mtd, start, &ops); + ret = mtd_write_oob(mtd, start, &ops); if (ops.oobretlen > 0xFFFFFFFFU) ret = -EOVERFLOW; @@ -609,7 +609,7 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd, ops.oobbuf = NULL; } - ret = mtd->write_oob(mtd, (loff_t)req.start, &ops); + ret = mtd_write_oob(mtd, (loff_t)req.start, &ops); kfree(ops.datbuf); kfree(ops.oobbuf); diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index cf35642e5f4..3d9c1ffdbbb 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -333,7 +333,7 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) if (to + devops.len > subdev->size) devops.len = subdev->size - to; - err = subdev->write_oob(subdev, to, &devops); + err = mtd_write_oob(subdev, to, &devops); ops->retlen += devops.oobretlen; if (err) return err; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 6fdc74ef19c..8a46cd2bb78 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -217,7 +217,7 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to, return -EINVAL; if (ops->datbuf && to + ops->len > mtd->size) return -EINVAL; - return part->master->write_oob(part->master, to + part->offset, ops); + return mtd_write_oob(part->master, to + part->offset, ops); } static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from, diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 0f0ab18d440..85797390e3d 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -403,7 +403,7 @@ static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; } - ret = d->mtd->write_oob(d->mtd, offset , &ops); + ret = mtd_write_oob(d->mtd, offset, &ops); if (ret) { dev_warn(d->dev, "Write OOB failed for block at %08llx " @@ -946,7 +946,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, patt = mtdswap_test_patt(test + i); memset(d->page_buf, patt, mtd->writesize); memset(d->oob_buf, patt, mtd->ecclayout->oobavail); - ret = mtd->write_oob(mtd, pos, &ops); + ret = mtd_write_oob(mtd, pos, &ops); if (ret) goto error; diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index fcab50e80b9..20a112f591f 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -350,7 +350,7 @@ static int scan_write_bbt(struct mtd_info *mtd, loff_t offs, size_t len, ops.oobbuf = oob; ops.len = len; - return mtd->write_oob(mtd, offs, &ops); + return mtd_write_oob(mtd, offs, &ops); } static u32 bbt_get_ver_offs(struct mtd_info *mtd, struct nand_bbt_descr *td) diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c index 32ae5af7444..774c3c26671 100644 --- a/drivers/mtd/nand/sm_common.c +++ b/drivers/mtd/nand/sm_common.c @@ -55,7 +55,7 @@ static int sm_block_markbad(struct mtd_info *mtd, loff_t ofs) ops.datbuf = NULL; - ret = mtd->write_oob(mtd, ofs, &ops); + ret = mtd_write_oob(mtd, ofs, &ops); if (ret < 0 || ops.oobretlen != SM_OOB_SIZE) { printk(KERN_NOTICE "sm_common: can't mark sector at %i as bad\n", diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 7497f5efc26..8847e60ad16 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -174,7 +174,7 @@ int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len, ops.oobbuf = buf; ops.datbuf = NULL; - res = mtd->write_oob(mtd, offs & ~mask, &ops); + res = mtd_write_oob(mtd, offs & ~mask, &ops); *retlen = ops.oobretlen; return res; } @@ -198,7 +198,7 @@ static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len, ops.datbuf = buf; ops.len = len; - res = mtd->write_oob(mtd, offs & ~mask, &ops); + res = mtd_write_oob(mtd, offs & ~mask, &ops); *retlen = ops.retlen; return res; } diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 748aa441669..4ec2af7fb84 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -343,7 +343,7 @@ static int sm_write_sector(struct sm_ftl *ftl, ops.ooblen = SM_OOB_SIZE; ops.oobbuf = (void *)oob; - ret = mtd->write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops); + ret = mtd_write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops); /* Now we assume that hardware will catch write bitflip errors */ /* If you are paranoid, use CONFIG_MTD_NAND_VERIFY_WRITE */ diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index 962d27a64e6..81113885e08 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -139,7 +139,7 @@ static int write_eraseblock(int ebnum) ops.ooboffs = use_offset; ops.datbuf = NULL; ops.oobbuf = writebuf; - err = mtd->write_oob(mtd, addr, &ops); + err = mtd_write_oob(mtd, addr, &ops); if (err || ops.oobretlen != use_len) { printk(PRINT_PREF "error: writeoob failed at %#llx\n", (long long)addr); @@ -524,7 +524,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = writebuf; printk(PRINT_PREF "attempting to start write past end of OOB\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->write_oob(mtd, addr0, &ops); + err = mtd_write_oob(mtd, addr0, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -568,7 +568,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = writebuf; printk(PRINT_PREF "attempting to write past end of device\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops); + err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -612,7 +612,7 @@ static int __init mtd_oobtest_init(void) ops.oobbuf = writebuf; printk(PRINT_PREF "attempting to write past end of device\n"); printk(PRINT_PREF "an error is expected...\n"); - err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops); + err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops); if (err) { printk(PRINT_PREF "error occurred as expected\n"); err = 0; @@ -670,7 +670,7 @@ static int __init mtd_oobtest_init(void) ops.ooboffs = 0; ops.datbuf = NULL; ops.oobbuf = writebuf; - err = mtd->write_oob(mtd, addr, &ops); + err = mtd_write_oob(mtd, addr, &ops); if (err) goto out; if (i % 256 == 0) diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c index ed8e5f06708..4aa48ddf979 100644 --- a/drivers/staging/spectra/lld_mtd.c +++ b/drivers/staging/spectra/lld_mtd.c @@ -411,9 +411,9 @@ u16 mtd_Write_Page_Main_Spare(u8 *write_data, u32 Block, ops.ooblen = BTSIG_BYTES; ops.ooboffs = 0; - ret = spectra_mtd->write_oob(spectra_mtd, - (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), - &ops); + ret = mtd_write_oob(spectra_mtd, + (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize), + &ops); if (ret) { printk(KERN_ERR "%s failed %d\n", __func__, ret); return FAIL; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index efc0cb37030..eae5be48368 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1101,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; ops.datbuf = NULL; - ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops); + ret = mtd_write_oob(c->mtd, jeb->offset, &ops); if (ret || ops.oobretlen != ops.ooblen) { printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd" " bytes, read %zd bytes, error %d\n", diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 0db8d87ce45..abbc96ad3b2 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -190,16 +190,14 @@ struct mtd_info { size_t *retlen, const u_char *buf); int (*read_oob) (struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); + int (*write_oob) (struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops); /* Backing device capabilities for this device * - provides mmap capabilities */ struct backing_dev_info *backing_dev_info; - - int (*write_oob) (struct mtd_info *mtd, loff_t to, - struct mtd_oob_ops *ops); - /* * Methods to access the protection register area, present in some * flash devices. The user data is one time programmable but the @@ -326,6 +324,12 @@ static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from, return mtd->read_oob(mtd, from, ops); } +static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops) +{ + return mtd->write_oob(mtd, to, ops); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From b0a31f7b2a668f00a8d0546dfeed65fac871b2da Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 18:59:12 +0200 Subject: mtd: introduce mtd_writev interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdconcat.c | 5 +++-- drivers/mtd/mtdpart.c | 4 ++-- fs/jffs2/writev.c | 2 +- include/linux/mtd/mtd.h | 18 ++++++++++++------ 4 files changed, 18 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 3d9c1ffdbbb..6fdae191e1b 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -227,8 +227,9 @@ concat_writev(struct mtd_info *mtd, const struct kvec *vecs, if (!(subdev->flags & MTD_WRITEABLE)) err = -EROFS; else - err = subdev->writev(subdev, &vecs_copy[entry_low], - entry_high - entry_low + 1, to, &retsize); + err = mtd_writev(subdev, &vecs_copy[entry_low], + entry_high - entry_low + 1, to, + &retsize); vecs_copy[entry_high].iov_len = old_iov_len - size; vecs_copy[entry_high].iov_base += size; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 0bb16d6ed08..c0bfa88c82f 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -238,8 +238,8 @@ static int part_writev(struct mtd_info *mtd, const struct kvec *vecs, struct mtd_part *part = PART(mtd); if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; - return part->master->writev(part->master, vecs, count, - to + part->offset, retlen); + return mtd_writev(part->master, vecs, count, to + part->offset, + retlen); } static int part_erase(struct mtd_info *mtd, struct erase_info *instr) diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c index b05710fd552..d0ef068709a 100644 --- a/fs/jffs2/writev.c +++ b/fs/jffs2/writev.c @@ -52,7 +52,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, } if (c->mtd->writev) - return c->mtd->writev(c->mtd, vecs, count, to, retlen); + return mtd_writev(c->mtd, vecs, count, to, retlen); else { return mtd_fake_writev(c->mtd, vecs, count, to, retlen); } diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index b58e5e8746e..4129cb5c3de 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -204,18 +204,14 @@ struct mtd_info { size_t *retlen, u_char *buf); int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len); + int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen); /* Backing device capabilities for this device * - provides mmap capabilities */ struct backing_dev_info *backing_dev_info; - /* kvec-based read/write methods. - NB: The 'count' parameter is the number of _vectors_, each of - which contains an (ofs, len) tuple. - */ - int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); - /* Sync */ void (*sync) (struct mtd_info *mtd); @@ -375,6 +371,16 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, return mtd->lock_user_prot_reg(mtd, from, len); } +/* + * kvec-based read/write method. NB: The 'count' parameter is the number of + * _vectors_, each of which contains an (ofs, len) tuple. + */ +static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) +{ + return mtd->writev(mtd, vecs, count, to, retlen); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 85f2f2a809d658c15b574df02ede92090f45a1f2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 19:03:12 +0200 Subject: mtd: introduce mtd_sync interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/ftl.c | 2 +- drivers/mtd/mtdblock.c | 4 ++-- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdpart.c | 2 +- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/rfd_ftl.c | 2 +- drivers/mtd/ubi/kapi.c | 2 +- fs/jffs2/super.c | 2 +- fs/logfs/dev_mtd.c | 2 +- include/linux/mtd/mtd.h | 9 ++++++--- 11 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index d591b1d0a6c..c9c90299c9e 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -651,7 +651,7 @@ static int reclaim_block(partition_t *part) pr_debug("ftl_cs: waiting for transfer " "unit to be prepared...\n"); if (part->mbd.mtd->sync) - part->mbd.mtd->sync(part->mbd.mtd); + mtd_sync(part->mbd.mtd); } else { static int ne = 0; if (++ne < 5) diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index ac7f1f1faa2..496e1a6e802 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -323,7 +323,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd) if (!--mtdblk->count) { /* It was the last usage. Free the cache */ if (mbd->mtd->sync) - mbd->mtd->sync(mbd->mtd); + mtd_sync(mbd->mtd); vfree(mtdblk->cache_data); } @@ -343,7 +343,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev) mutex_unlock(&mtdblk->cache_mutex); if (dev->mtd->sync) - dev->mtd->sync(dev->mtd); + mtd_sync(dev->mtd); return 0; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 86308acb40e..b5722ecf19d 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -155,7 +155,7 @@ static int mtdchar_close(struct inode *inode, struct file *file) /* Only sync if opened RW */ if ((file->f_mode & FMODE_WRITE) && mtd->sync) - mtd->sync(mtd); + mtd_sync(mtd); iput(mfi->ino); diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 6fdae191e1b..cc2336edfe2 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -620,7 +620,7 @@ static void concat_sync(struct mtd_info *mtd) for (i = 0; i < concat->num_subdev; i++) { struct mtd_info *subdev = concat->subdev[i]; - subdev->sync(subdev); + mtd_sync(subdev); } } diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index c0bfa88c82f..2b545052795 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -301,7 +301,7 @@ static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) static void part_sync(struct mtd_info *mtd) { struct mtd_part *part = PART(mtd); - part->master->sync(part->master); + mtd_sync(part->master); } static int part_suspend(struct mtd_info *mtd) diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 85797390e3d..cb794e76101 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -1048,7 +1048,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev) struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); if (d->mtd->sync) - d->mtd->sync(d->mtd); + mtd_sync(d->mtd); return 0; } diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index c594bb7abfa..5426d42cdea 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -449,7 +449,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector) /* we have a race if sync doesn't exist */ if (part->mbd.mtd->sync) - part->mbd.mtd->sync(part->mbd.mtd); + mtd_sync(part->mbd.mtd); score = 0x7fffffff; /* MAX_INT */ best_block = -1; diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 1a35fc5e3b4..9f265cc1a0d 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -715,7 +715,7 @@ int ubi_sync(int ubi_num) return -ENODEV; if (ubi->mtd->sync) - ubi->mtd->sync(ubi->mtd); + mtd_sync(ubi->mtd); ubi_put_device(ubi); return 0; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index e7e97445411..e78bf3cd1b7 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -337,7 +337,7 @@ static void jffs2_put_super (struct super_block *sb) kfree(c->inocache_list); jffs2_clear_xattr_subsystem(c); if (c->mtd->sync) - c->mtd->sync(c->mtd); + mtd_sync(c->mtd); D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); } diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 1842440d656..0ca7a07db6c 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -120,7 +120,7 @@ static void logfs_mtd_sync(struct super_block *sb) struct mtd_info *mtd = logfs_super(sb)->s_mtd; if (mtd->sync) - mtd->sync(mtd); + mtd_sync(mtd); } static int logfs_mtd_readpage(void *_sb, struct page *page) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 4129cb5c3de..47ea19c1e52 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -206,15 +206,13 @@ struct mtd_info { size_t len); int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); + void (*sync) (struct mtd_info *mtd); /* Backing device capabilities for this device * - provides mmap capabilities */ struct backing_dev_info *backing_dev_info; - /* Sync */ - void (*sync) (struct mtd_info *mtd); - /* Chip-supported device locking */ int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); @@ -381,6 +379,11 @@ static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, return mtd->writev(mtd, vecs, count, to, retlen); } +static inline void mtd_sync(struct mtd_info *mtd) +{ + mtd->sync(mtd); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 7086c19d07429d697057587caf1e5e0345442d16 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 19:35:30 +0200 Subject: mtd: introduce mtd_block_isbad interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- arch/cris/arch-v32/drivers/axisflashmap.c | 3 +-- drivers/mtd/inftlmount.c | 3 ++- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdoops.c | 4 ++-- drivers/mtd/mtdpart.c | 5 ++--- drivers/mtd/mtdswap.c | 4 ++-- drivers/mtd/nftlmount.c | 3 ++- drivers/mtd/redboot.c | 4 ++-- drivers/mtd/ssfdc.c | 4 ++-- drivers/mtd/tests/mtd_oobtest.c | 2 +- drivers/mtd/tests/mtd_pagetest.c | 2 +- drivers/mtd/tests/mtd_readtest.c | 2 +- drivers/mtd/tests/mtd_speedtest.c | 2 +- drivers/mtd/tests/mtd_stresstest.c | 2 +- drivers/mtd/tests/mtd_subpagetest.c | 2 +- drivers/mtd/tests/mtd_torturetest.c | 3 +-- drivers/mtd/ubi/io.c | 2 +- fs/jffs2/scan.c | 2 +- fs/logfs/dev_mtd.c | 4 ++-- include/linux/mtd/mtd.h | 7 ++++++- 21 files changed, 34 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index 011bddbf073..b34438e026b 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -404,8 +404,7 @@ static int __init init_axis_flash(void) */ int blockstat; do { - blockstat = main_mtd->block_isbad(main_mtd, - ptable_sector); + blockstat = mtd_block_isbad(main_mtd, ptable_sector); if (blockstat < 0) ptable_sector = 0; /* read error */ else if (blockstat) diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 9bfbca5d88d..38519401196 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -306,7 +306,8 @@ static int find_boot_record(struct INFTLrecord *inftl) /* If any of the physical eraseblocks are bad, don't use the unit. */ for (physblock = 0; physblock < inftl->EraseSize; physblock += inftl->mbd.mtd->erasesize) { - if (inftl->mbd.mtd->block_isbad(inftl->mbd.mtd, i * inftl->EraseSize + physblock)) + if (mtd_block_isbad(inftl->mbd.mtd, + i * inftl->EraseSize + physblock)) inftl->PUtable[i] = BLOCK_RESERVED; } } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 6d598b23cf3..a499bf7a821 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -886,7 +886,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) if (!mtd->block_isbad) ret = -EOPNOTSUPP; else - return mtd->block_isbad(mtd, offs); + return mtd_block_isbad(mtd, offs); break; } diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 4b7f825ce01..d0db5e61d5a 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -667,7 +667,7 @@ static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs) continue; } - res = subdev->block_isbad(subdev, ofs); + res = mtd_block_isbad(subdev, ofs); break; } diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 7be2018ffbc..bc43d2f7272 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -170,7 +170,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work) } while (mtd->block_isbad) { - ret = mtd->block_isbad(mtd, cxt->nextpage * record_size); + ret = mtd_block_isbad(mtd, cxt->nextpage * record_size); if (!ret) break; if (ret < 0) { @@ -254,7 +254,7 @@ static void find_next_position(struct mtdoops_context *cxt) for (page = 0; page < cxt->oops_pages; page++) { if (mtd->block_isbad && - mtd->block_isbad(mtd, page * record_size)) + mtd_block_isbad(mtd, page * record_size)) continue; /* Assume the page is used */ mark_page_used(cxt, page); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 8610750852a..0e7dfc79d33 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -322,7 +322,7 @@ static int part_block_isbad(struct mtd_info *mtd, loff_t ofs) if (ofs >= mtd->size) return -EINVAL; ofs += part->offset; - return part->master->block_isbad(part->master, ofs); + return mtd_block_isbad(part->master, ofs); } static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) @@ -553,8 +553,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master, uint64_t offs = 0; while (offs < slave->mtd.size) { - if (master->block_isbad(master, - offs + slave->offset)) + if (mtd_block_isbad(master, offs + slave->offset)) slave->mtd.ecc_stats.badblocks++; offs += slave->mtd.erasesize; } diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index cb794e76101..87aa0a6323c 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -343,7 +343,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) offset = mtdswap_eb_offset(d, eb); /* Check first if the block is bad. */ - if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset)) + if (d->mtd->block_isbad && mtd_block_isbad(d->mtd, offset)) return MTDSWAP_SCANNED_BAD; ops.ooblen = 2 * d->mtd->ecclayout->oobavail; @@ -1061,7 +1061,7 @@ static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) if (mtd->block_isbad) for (offset = 0; offset < size; offset += mtd->erasesize) - if (mtd->block_isbad(mtd, offset)) + if (mtd_block_isbad(mtd, offset)) badcnt++; return badcnt; diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c index b068dc8a366..156af9f8796 100644 --- a/drivers/mtd/nftlmount.c +++ b/drivers/mtd/nftlmount.c @@ -242,7 +242,8 @@ The new DiskOnChip driver already scanned the bad block table. Just query it. if (buf[i & (SECTORSIZE - 1)] != 0xff) nftl->ReplUnitTable[i] = BLOCK_RESERVED; #endif - if (nftl->mbd.mtd->block_isbad(nftl->mbd.mtd, i * nftl->EraseSize)) + if (mtd_block_isbad(nftl->mbd.mtd, + i * nftl->EraseSize)) nftl->ReplUnitTable[i] = BLOCK_RESERVED; } diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c index 623d9b86d0d..09bb81ea3a7 100644 --- a/drivers/mtd/redboot.c +++ b/drivers/mtd/redboot.c @@ -79,7 +79,7 @@ static int parse_redboot_partitions(struct mtd_info *master, if ( directory < 0 ) { offset = master->size + directory * master->erasesize; while (master->block_isbad && - master->block_isbad(master, offset)) { + mtd_block_isbad(master, offset)) { if (!offset) { nogood: printk(KERN_NOTICE "Failed to find a non-bad block to check for RedBoot partition table\n"); @@ -90,7 +90,7 @@ static int parse_redboot_partitions(struct mtd_info *master, } else { offset = directory * master->erasesize; while (master->block_isbad && - master->block_isbad(master, offset)) { + mtd_block_isbad(master, offset)) { offset += master->erasesize; if (offset == master->size) goto nogood; diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index 0e688133835..ab2a52a039c 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -122,7 +122,7 @@ static int get_valid_cis_sector(struct mtd_info *mtd) * is not SSFDC formatted */ for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) { - if (!mtd->block_isbad(mtd, offset)) { + if (mtd_block_isbad(mtd, offset)) { ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf); @@ -255,7 +255,7 @@ static int build_logical_block_map(struct ssfdcr_record *ssfdc) for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len; phys_block++) { offset = (unsigned long)phys_block * ssfdc->erase_size; - if (mtd->block_isbad(mtd, offset)) + if (mtd_block_isbad(mtd, offset)) continue; /* skip bad blocks */ ret = read_raw_oob(mtd, offset, oob_buf); diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index 81113885e08..ed9b62827f1 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -329,7 +329,7 @@ static int is_block_bad(int ebnum) int ret; loff_t addr = ebnum * mtd->erasesize; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index 83da97e54f9..8024eaf4c1a 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -469,7 +469,7 @@ static int is_block_bad(int ebnum) loff_t addr = ebnum * mtd->erasesize; int ret; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c index 5eaeada8428..ad5fd0df86e 100644 --- a/drivers/mtd/tests/mtd_readtest.c +++ b/drivers/mtd/tests/mtd_readtest.c @@ -132,7 +132,7 @@ static int is_block_bad(int ebnum) loff_t addr = ebnum * mtd->erasesize; int ret; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index c7b18e18908..ecb28784750 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -296,7 +296,7 @@ static int is_block_bad(int ebnum) loff_t addr = ebnum * mtd->erasesize; int ret; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index f8aac4b7e59..4789c0ee3e9 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -132,7 +132,7 @@ static int is_block_bad(int ebnum) loff_t addr = ebnum * mtd->erasesize; int ret; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index b90c01036b4..4b873d49fe6 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -344,7 +344,7 @@ static int is_block_bad(int ebnum) loff_t addr = ebnum * mtd->erasesize; int ret; - ret = mtd->block_isbad(mtd, addr); + ret = mtd_block_isbad(mtd, addr); if (ret) printk(PRINT_PREF "block %d is bad\n", ebnum); return ret; diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index dd34a519fa7..30c4ed9855e 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -292,8 +292,7 @@ static int __init tort_init(void) memset(&bad_ebs[0], 0, sizeof(int) * ebcnt); if (mtd->block_isbad) { for (i = eb; i < eb + ebcnt; i++) { - err = mtd->block_isbad(mtd, - (loff_t)i * mtd->erasesize); + err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize); if (err < 0) { printk(PRINT_PREF "block_isbad() returned %d " diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 8d832fc9e9e..a1b683ad639 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -634,7 +634,7 @@ int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) if (ubi->bad_allowed) { int ret; - ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size); + ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size); if (ret < 0) ubi_err("error %d while checking if PEB %d is bad", ret, pnum); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 72f3960f44a..83e1665e257 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -455,7 +455,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo if (jffs2_cleanmarker_oob(c)) { int ret; - if (c->mtd->block_isbad(c->mtd, jeb->offset)) + if (mtd_block_isbad(c->mtd, jeb->offset)) return BLK_STATE_BADBLOCK; ret = jffs2_check_nand_cleanmarker(c, jeb); diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 0ca7a07db6c..136c7360a9b 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -157,7 +157,7 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) return NULL; *ofs = 0; - while (mtd->block_isbad(mtd, *ofs)) { + while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; if (*ofs >= mtd->size) return NULL; @@ -177,7 +177,7 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) return NULL; *ofs = mtd->size - mtd->erasesize; - while (mtd->block_isbad(mtd, *ofs)) { + while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; if (*ofs <= 0) return NULL; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index d6b4aa17750..a307ad093a5 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -210,6 +210,7 @@ struct mtd_info { int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len); + int (*block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*suspend) (struct mtd_info *mtd); void (*resume) (struct mtd_info *mtd); @@ -219,7 +220,6 @@ struct mtd_info { struct backing_dev_info *backing_dev_info; /* Bad block management functions */ - int (*block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*block_markbad) (struct mtd_info *mtd, loff_t ofs); struct notifier_block reboot_notifier; /* default mode before reboot */ @@ -406,6 +406,11 @@ static inline void mtd_resume(struct mtd_info *mtd) mtd->resume(mtd); } +static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) +{ + return mtd->block_isbad(mtd, ofs); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 5942ddbc500d1c9b75e571b656be97f65b26adfe Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Dec 2011 19:37:38 +0200 Subject: mtd: introduce mtd_block_markbad interface Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/inftlmount.c | 2 +- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdoops.c | 2 +- drivers/mtd/mtdpart.c | 2 +- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nand/nandsim.c | 2 +- drivers/mtd/nftlmount.c | 2 +- drivers/mtd/onenand/onenand_base.c | 2 +- drivers/mtd/ubi/io.c | 2 +- fs/jffs2/wbuf.c | 2 +- include/linux/mtd/mtd.h | 9 ++++++--- 12 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 38519401196..4adc0374fb6 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -424,7 +424,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block) fail: /* could not format, update the bad block table (caller is responsible for setting the PUtable to BLOCK_RESERVED on failure) */ - inftl->mbd.mtd->block_markbad(inftl->mbd.mtd, instr->addr); + mtd_block_markbad(inftl->mbd.mtd, instr->addr); return -1; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index a499bf7a821..15a3f6224be 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -899,7 +899,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) if (!mtd->block_markbad) ret = -EOPNOTSUPP; else - return mtd->block_markbad(mtd, offs); + return mtd_block_markbad(mtd, offs); break; } diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index d0db5e61d5a..f694b51e785 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -693,7 +693,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs) continue; } - err = subdev->block_markbad(subdev, ofs); + err = mtd_block_markbad(subdev, ofs); if (!err) mtd->ecc_stats.badblocks++; break; diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index bc43d2f7272..69532a34e56 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -200,7 +200,7 @@ badblock: } if (mtd->block_markbad && ret == -EIO) { - ret = mtd->block_markbad(mtd, cxt->nextpage * record_size); + ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); if (ret < 0) { printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n"); return; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 0e7dfc79d33..a3d44c3416b 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -335,7 +335,7 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs) if (ofs >= mtd->size) return -EINVAL; ofs += part->offset; - res = part->master->block_markbad(part->master, ofs); + res = mtd_block_markbad(part->master, ofs); if (!res) mtd->ecc_stats.badblocks++; return res; diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 87aa0a6323c..4441c08b082 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -279,7 +279,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) offset = mtdswap_eb_offset(d, eb); dev_warn(d->dev, "Marking bad block at %08llx\n", offset); - ret = d->mtd->block_markbad(d->mtd, offset); + ret = mtd_block_markbad(d->mtd, offset); if (ret) { dev_warn(d->dev, "Mark block bad failed for block at %08llx " diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 34c03be7730..261f478f8cc 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -737,7 +737,7 @@ static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd) return -EINVAL; } offset = erase_block_no * ns->geom.secsz; - if (mtd->block_markbad(mtd, offset)) { + if (mtd_block_markbad(mtd, offset)) { NS_ERR("invalid badblocks.\n"); return -EINVAL; } diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c index 156af9f8796..51b9d6af307 100644 --- a/drivers/mtd/nftlmount.c +++ b/drivers/mtd/nftlmount.c @@ -356,7 +356,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block) fail: /* could not format, update the bad block table (caller is responsible for setting the ReplUnitTable to BLOCK_RESERVED on failure) */ - nftl->mbd.mtd->block_markbad(nftl->mbd.mtd, instr->addr); + mtd_block_markbad(nftl->mbd.mtd, instr->addr); return -1; } diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index a8394730b4b..dd278a28413 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -2645,7 +2645,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs) } onenand_get_device(mtd, FL_WRITING); - ret = this->block_markbad(mtd, ofs); + ret = mtd_block_markbad(mtd, ofs); onenand_release_device(mtd); return ret; } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index a1b683ad639..5cde4e5ca3e 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -669,7 +669,7 @@ int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) if (!ubi->bad_allowed) return 0; - err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size); + err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size); if (err) ubi_err("cannot mark PEB %d bad, error %d", pnum, err); return err; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index eae5be48368..fd96b757433 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1134,7 +1134,7 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * return 1; // What else can we do? printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); - ret = c->mtd->block_markbad(c->mtd, bad_offset); + ret = mtd_block_markbad(c->mtd, bad_offset); if (ret) { D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a307ad093a5..64aa54fba2d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -211,6 +211,7 @@ struct mtd_info { int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len); int (*block_isbad) (struct mtd_info *mtd, loff_t ofs); + int (*block_markbad) (struct mtd_info *mtd, loff_t ofs); int (*suspend) (struct mtd_info *mtd); void (*resume) (struct mtd_info *mtd); @@ -219,9 +220,6 @@ struct mtd_info { */ struct backing_dev_info *backing_dev_info; - /* Bad block management functions */ - int (*block_markbad) (struct mtd_info *mtd, loff_t ofs); - struct notifier_block reboot_notifier; /* default mode before reboot */ /* ECC status information */ @@ -411,6 +409,11 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) return mtd->block_isbad(mtd, ofs); } +static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) +{ + return mtd->block_markbad(mtd, ofs); +} + static inline struct mtd_info *dev_to_mtd(struct device *dev) { return dev ? dev_get_drvdata(dev) : NULL; -- cgit v1.2.3 From 4ccf2f1349e681401b5fae73efc87b8d2d70ce0e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 30 Dec 2011 15:48:55 +0200 Subject: jffs: remove custom mtd_fake_writev function Instead, use 'default_mtd_writev()' function which MTD provides. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/writev.c | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c index d0ef068709a..8d704073f8b 100644 --- a/fs/jffs2/writev.c +++ b/fs/jffs2/writev.c @@ -13,31 +13,6 @@ #include #include "nodelist.h" -/* This ought to be in core MTD code. All registered MTD devices - without writev should have this put in place. Bug the MTD - maintainer */ -static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) -{ - unsigned long i; - size_t totlen = 0, thislen; - int ret = 0; - - for (i=0; imtd->writev) return mtd_writev(c->mtd, vecs, count, to, retlen); else { - return mtd_fake_writev(c->mtd, vecs, count, to, retlen); + return default_mtd_writev(c->mtd, vecs, count, to, retlen); } } -- cgit v1.2.3 From 10934478e44d9a5a7b16dadd89094fb608cf101e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 28 Dec 2011 15:55:42 +0200 Subject: mtd: do use mtd->point directly Remove direct usage of the "mtd->point" function pointer. Instead, test the mtd_point() return code for '-EOPNOTSUPP'. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/erase.c | 9 ++++----- fs/jffs2/readinode.c | 18 ++++++++---------- fs/jffs2/scan.c | 2 +- include/linux/mtd/mtd.h | 2 ++ 4 files changed, 15 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index c59d642cade..a01cdad6aad 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -336,12 +336,11 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl uint32_t ofs; size_t retlen; int ret = -EIO; + unsigned long *wordebuf; - if (c->mtd->point) { - unsigned long *wordebuf; - - ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, - &ebuf, NULL); + ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, + &ebuf, NULL); + if (ret != -EOPNOTSUPP) { if (ret) { D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); goto do_flash_read; diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index fca2f84e1ad..3093ac4fb24 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -62,17 +62,15 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info #ifndef __ECOS /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), * adding and jffs2_flash_read_end() interface. */ - if (c->mtd->point) { - err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, - NULL); - if (!err && retlen < len) { - JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); - mtd_unpoint(c->mtd, ofs, retlen); - } else if (err) + err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, NULL); + if (!err && retlen < len) { + JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); + mtd_unpoint(c->mtd, ofs, retlen); + } else if (err) { + if (err != -EOPNOTSUPP) JFFS2_WARNING("MTD point failed: error code %d.\n", err); - else - pointed = 1; /* succefully pointed to device */ - } + } else + pointed = 1; /* succefully pointed to device */ #endif if (!pointed) { diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 83e1665e257..f99464833bb 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -105,7 +105,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) mtd_unpoint(c->mtd, 0, pointlen); flashbuf = NULL; } - if (ret) + if (ret && ret != -EOPNOTSUPP) D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); } #endif diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 478701566ba..b355a83e7cc 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -259,6 +259,8 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys) { *retlen = 0; + if (!mtd->point) + return -EOPNOTSUPP; return mtd->point(mtd, from, len, retlen, virt, phys); } -- cgit v1.2.3 From 4991e7251ed951a5f33faf25912e9db416306309 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 28 Dec 2011 17:08:03 +0200 Subject: romfs: do not use mtd->get_unmapped_area directly Remove direct usage of mtd->get_unmapped_area. Instead, just call 'mtd_get_unmapped_area()' which will return -EOPNOTSUPP if the function is not implemented, and then test for this code. We also translate -EOPNOTSUPP to -ENOSYS because this return code is probably part of the kernel ABI which we do not want to break. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/romfs/mmap-nommu.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index d5168e8e7dc..e1a7779dd3c 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -28,9 +28,10 @@ static unsigned long romfs_get_unmapped_area(struct file *file, struct inode *inode = file->f_mapping->host; struct mtd_info *mtd = inode->i_sb->s_mtd; unsigned long isize, offset, maxpages, lpages; + int ret; if (!mtd) - goto cant_map_directly; + return (unsigned long) -ENOSYS; /* the mapping mustn't extend beyond the EOF */ lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -41,23 +42,20 @@ static unsigned long romfs_get_unmapped_area(struct file *file, if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) return (unsigned long) -EINVAL; - /* we need to call down to the MTD layer to do the actual mapping */ - if (mtd->get_unmapped_area) { - if (addr != 0) - return (unsigned long) -EINVAL; - - if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) - return (unsigned long) -EINVAL; + if (addr != 0) + return (unsigned long) -EINVAL; - offset += ROMFS_I(inode)->i_dataoffset; - if (offset > mtd->size - len) - return (unsigned long) -EINVAL; + if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) + return (unsigned long) -EINVAL; - return mtd_get_unmapped_area(mtd, len, offset, flags); - } + offset += ROMFS_I(inode)->i_dataoffset; + if (offset > mtd->size - len) + return (unsigned long) -EINVAL; -cant_map_directly: - return (unsigned long) -ENOSYS; + ret = mtd_get_unmapped_area(mtd, len, offset, flags); + if (ret == -EOPNOTSUPP) + ret = -ENOSYS; + return (unsigned long) ret; } /* -- cgit v1.2.3 From 1dbebd32562b3c2caeca35960e5cb00bfcc12900 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 30 Dec 2011 16:23:41 +0200 Subject: mtd: harmonize mtd_writev usage This patch makes the 'mtd_writev()' function more usable and logical. We first teach it to fall-back to the 'default_mtd_writev()' function if the MTD driver does not define its own '->writev()' method. Then we make block2mtd and JFFS2 just 'mtd_writev()' instead of 'default_mtd_writev()' function. This means we can now stop exporting 'default_mtd_writev()' and instead, export 'mtd_writev()'. This is much cleaner and more logical, as well as allows us to get read of another direct 'mtd->writev' access in JFFS2. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/devices/block2mtd.c | 2 +- drivers/mtd/mtdcore.c | 26 +++++++++++++++++++++++--- fs/jffs2/writev.c | 6 +----- include/linux/mtd/mtd.h | 16 ++-------------- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index b78f23169d4..c16f6b4e893 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -288,7 +288,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) dev->mtd.flags = MTD_CAP_RAM; dev->mtd.erase = block2mtd_erase; dev->mtd.write = block2mtd_write; - dev->mtd.writev = default_mtd_writev; + dev->mtd.writev = mtd_writev; dev->mtd.sync = block2mtd_sync; dev->mtd.read = block2mtd_read; dev->mtd.priv = dev; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 53a200f722b..4d0f3e557bd 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -696,8 +696,8 @@ EXPORT_SYMBOL_GPL(__put_mtd_device); * This function returns zero in case of success and a negative error code in * case of failure. */ -int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) +static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) { unsigned long i; size_t totlen = 0, thislen; @@ -716,7 +716,27 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, *retlen = totlen; return ret; } -EXPORT_SYMBOL_GPL(default_mtd_writev); + +/* + * mtd_writev - the vector-based MTD write method + * @mtd: mtd device description object pointer + * @vecs: the vectors to write + * @count: count of vectors in @vecs + * @to: the MTD device offset to write to + * @retlen: on exit contains the count of bytes written to the MTD device. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen) +{ + *retlen = 0; + if (!mtd->writev) + return default_mtd_writev(mtd, vecs, count, to, retlen); + return mtd->writev(mtd, vecs, count, to, retlen); +} +EXPORT_SYMBOL_GPL(mtd_writev); /** * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c index 8d704073f8b..a1bda9dab3f 100644 --- a/fs/jffs2/writev.c +++ b/fs/jffs2/writev.c @@ -26,11 +26,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, } } - if (c->mtd->writev) - return mtd_writev(c->mtd, vecs, count, to, retlen); - else { - return default_mtd_writev(c->mtd, vecs, count, to, retlen); - } + return mtd_writev(c->mtd, vecs, count, to, retlen); } int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a994129ede5..a58ecf4d1f8 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -394,16 +394,8 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, return mtd->lock_user_prot_reg(mtd, from, len); } -/* - * kvec-based read/write method. NB: The 'count' parameter is the number of - * _vectors_, each of which contains an (ofs, len) tuple. - */ -static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) -{ - *retlen = 0; - return mtd->writev(mtd, vecs, count, to, retlen); -} +int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, + unsigned long count, loff_t to, size_t *retlen); static inline void mtd_sync(struct mtd_info *mtd) { @@ -510,10 +502,6 @@ struct mtd_notifier { extern void register_mtd_user (struct mtd_notifier *new); extern int unregister_mtd_user (struct mtd_notifier *old); - -int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen); - void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size); void mtd_erase_callback(struct erase_info *instr); -- cgit v1.2.3 From 327cf2922b4edf0439b219469722d2a502e37349 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 30 Dec 2011 16:35:35 +0200 Subject: mtd: do not use mtd->sync directly This patch teaches 'mtd_sync()' to do nothing when the MTD driver does not have the '->sync()' method, which allows us to remove all direct 'mtd->sync' accesses. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/ftl.c | 3 +-- drivers/mtd/mtdblock.c | 7 ++----- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdswap.c | 3 +-- drivers/mtd/rfd_ftl.c | 3 +-- drivers/mtd/ubi/kapi.c | 4 +--- fs/jffs2/super.c | 4 +--- fs/logfs/dev_mtd.c | 3 +-- include/linux/mtd/mtd.h | 3 ++- 9 files changed, 11 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index c9c90299c9e..19d637266fc 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -650,8 +650,7 @@ static int reclaim_block(partition_t *part) if (queued) { pr_debug("ftl_cs: waiting for transfer " "unit to be prepared...\n"); - if (part->mbd.mtd->sync) - mtd_sync(part->mbd.mtd); + mtd_sync(part->mbd.mtd); } else { static int ne = 0; if (++ne < 5) diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 496e1a6e802..af6591237b9 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -322,8 +322,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd) if (!--mtdblk->count) { /* It was the last usage. Free the cache */ - if (mbd->mtd->sync) - mtd_sync(mbd->mtd); + mtd_sync(mbd->mtd); vfree(mtdblk->cache_data); } @@ -341,9 +340,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev) mutex_lock(&mtdblk->cache_mutex); write_cached_data(mtdblk); mutex_unlock(&mtdblk->cache_mutex); - - if (dev->mtd->sync) - mtd_sync(dev->mtd); + mtd_sync(dev->mtd); return 0; } diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 2020a169ed9..23a51104aeb 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -154,7 +154,7 @@ static int mtdchar_close(struct inode *inode, struct file *file) pr_debug("MTD_close\n"); /* Only sync if opened RW */ - if ((file->f_mode & FMODE_WRITE) && mtd->sync) + if ((file->f_mode & FMODE_WRITE)) mtd_sync(mtd); iput(mfi->ino); diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 4441c08b082..fe4426c1c73 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -1047,8 +1047,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev) { struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); - if (d->mtd->sync) - mtd_sync(d->mtd); + mtd_sync(d->mtd); return 0; } diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index 5426d42cdea..233b946e5d6 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -448,8 +448,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector) int rc; /* we have a race if sync doesn't exist */ - if (part->mbd.mtd->sync) - mtd_sync(part->mbd.mtd); + mtd_sync(part->mbd.mtd); score = 0x7fffffff; /* MAX_INT */ best_block = -1; diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 9f265cc1a0d..9fdb35367fe 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -714,9 +714,7 @@ int ubi_sync(int ubi_num) if (!ubi) return -ENODEV; - if (ubi->mtd->sync) - mtd_sync(ubi->mtd); - + mtd_sync(ubi->mtd); ubi_put_device(ubi); return 0; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index e78bf3cd1b7..5863a369d92 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -336,9 +336,7 @@ static void jffs2_put_super (struct super_block *sb) jffs2_flash_cleanup(c); kfree(c->inocache_list); jffs2_clear_xattr_subsystem(c); - if (c->mtd->sync) - mtd_sync(c->mtd); - + mtd_sync(c->mtd); D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); } diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 136c7360a9b..3f465882ee7 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -119,8 +119,7 @@ static void logfs_mtd_sync(struct super_block *sb) { struct mtd_info *mtd = logfs_super(sb)->s_mtd; - if (mtd->sync) - mtd_sync(mtd); + mtd_sync(mtd); } static int logfs_mtd_readpage(void *_sb, struct page *page) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index a58ecf4d1f8..305f12b940f 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -399,7 +399,8 @@ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, static inline void mtd_sync(struct mtd_info *mtd) { - mtd->sync(mtd); + if (mtd->sync) + mtd->sync(mtd); } /* Chip-supported device locking */ -- cgit v1.2.3 From d58b27ed58a30faf376e40d19945f34301944b8d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 2 Jan 2012 13:52:14 +0200 Subject: logfs: do not use 'mtd->block_isbad' directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead, use the new 'mtd_can_have_bb()' helper. Cc: Jörn Engel Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/logfs/dev_mtd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 3f465882ee7..e97404d611e 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,7 +152,7 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd->block_isbad) + if (!mtd_can_have_bb(mtd)) return NULL; *ofs = 0; @@ -172,7 +172,7 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd->block_isbad) + if (!mtd_can_have_bb(mtd)) return NULL; *ofs = mtd->size - mtd->erasesize; -- cgit v1.2.3 From 800ffd3496987e91f599a135060ef49731e045ac Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 2 Jan 2012 13:59:12 +0200 Subject: mtd: do not use mtd->block_markbad directly Instead, use the new 'mtd_can_have_bb()', or just rely on 'mtd_block_markbad()' return code, which will be -EOPNOTSUPP if bad blocks are not supported. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdchar.c | 5 +---- drivers/mtd/mtdconcat.c | 2 +- drivers/mtd/mtdoops.c | 2 +- drivers/mtd/mtdswap.c | 2 +- fs/jffs2/wbuf.c | 3 --- include/linux/mtd/mtd.h | 2 ++ 6 files changed, 6 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 64efcbf087e..50c6a1e7f67 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -877,10 +877,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) if (copy_from_user(&offs, argp, sizeof(loff_t))) return -EFAULT; - if (!mtd->block_markbad) - ret = -EOPNOTSUPP; - else - return mtd_block_markbad(mtd, offs); + return mtd_block_markbad(mtd, offs); break; } diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index fbf3cb124a9..1ed5103b219 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -673,7 +673,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs) struct mtd_concat *concat = CONCAT(mtd); int i, err = -EINVAL; - if (!concat->subdev[0]->block_markbad) + if (!mtd_can_have_bb(concat->subdev[0])) return 0; if (ofs > mtd->size) diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index a4c8f67560e..db8e8272d69 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -199,7 +199,7 @@ badblock: return; } - if (mtd->block_markbad && ret == -EIO) { + if (mtd_can_have_bb(mtd) && ret == -EIO) { ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); if (ret < 0) { printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n"); diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 3fc8cb2756c..c92f0f6bc13 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -274,7 +274,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) eb->root = NULL; /* badblocks not supported */ - if (!d->mtd->block_markbad) + if (!mtd_can_have_bb(d->mtd)) return 1; offset = mtdswap_eb_offset(d, eb); diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index fd96b757433..30e8f47e8a2 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1130,9 +1130,6 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * if( ++jeb->bad_count < MAX_ERASE_FAILURES) return 0; - if (!c->mtd->block_markbad) - return 1; // What else can we do? - printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); ret = mtd_block_markbad(c->mtd, bad_offset); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 7e35755f693..1a81fde8f33 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -447,6 +447,8 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { + if (!mtd->block_markbad) + return -EOPNOTSUPP; return mtd->block_markbad(mtd, ofs); } -- cgit v1.2.3 From 074b1d12fe2500d7d453902f9266e6674b30d84c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Jan 2012 13:46:26 -0500 Subject: NFSv4: Change the default setting of the nfs4_disable_idmapping parameter Now that the use of numeric uids/gids is officially sanctioned in RFC3530bis, it is time to change the default here to 'enabled'. By doing so, we ensure that NFSv4 copies the behaviour of NFSv3 when we're using the default AUTH_SYS authentication (i.e. when the client uses the numeric uids/gids as authentication tokens), so that when new files are created, they will appear to have the correct user/group. It also fixes a number of backward compatibility issues when migrating from NFSv3 to NFSv4 on a platform where the server uses different uid/gid mappings than the client. Note also that this setting has been successfully tested against servers that do not support numeric uids/gids at several Connectathon/Bakeathon events at this point, and the fall back to using string names/groups has been shown to work well in all those test cases. Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 17 +++++++++++------ fs/nfs/client.c | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 81c287fad79..dfd21cfdf57 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1630,12 +1630,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The default is to return 64-bit inode numbers. nfs.nfs4_disable_idmapping= - [NFSv4] When set, this option disables the NFSv4 - idmapper on the client, but only if the mount - is using the 'sec=sys' security flavour. This may - make migration from legacy NFSv2/v3 systems easier - provided that the server has the appropriate support. - The default is to always enable NFSv4 idmapping. + [NFSv4] When set to the default of '1', this option + ensures that both the RPC level authentication + scheme and the NFS level operations agree to use + numeric uids/gids if the mount is using the + 'sec=sys' security flavour. In effect it is + disabling idmapping, which can make migration from + legacy NFSv2/v3 systems to NFSv4 easier. + Servers that do not support this mode of operation + will be autodetected by the client, and it will fall + back to using the idmapper. + To turn off this behaviour, set the value to '0'. nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take when a NMI is triggered. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 41bd67f80d3..277dfaf2e99 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -84,7 +84,7 @@ retry: /* * Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ -static int nfs4_disable_idmapping = 0; +static int nfs4_disable_idmapping = 1; /* * RPC cruft for NFS -- cgit v1.2.3 From 94bf608a18fa4421315275a81c5489734599297a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2012 15:53:24 -0500 Subject: ext4: fix failure exits a) leaking root dentry is bad b) in case of failed ext4_mb_init() we don't want to do ext4_mb_release() c) OTOH, in the same case we *do* want ext4_ext_release() Signed-off-by: Al Viro --- fs/ext4/super.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 64e2529ae9b..ed3ce82e2de 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3733,10 +3733,12 @@ no_journal: } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); + iput(root); goto failed_mount4; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { + iput(root); ext4_msg(sb, KERN_ERR, "get root dentry failed"); ret = -ENOMEM; goto failed_mount4; @@ -3773,7 +3775,7 @@ no_journal: if (err) { ext4_msg(sb, KERN_ERR, "failed to initialize system " "zone (%d)", err); - goto failed_mount4; + goto failed_mount4a; } ext4_ext_init(sb); @@ -3830,13 +3832,14 @@ cantfind_ext4: failed_mount7: ext4_unregister_li_request(sb); failed_mount6: - ext4_ext_release(sb); -failed_mount5: ext4_mb_release(sb); +failed_mount5: + ext4_ext_release(sb); ext4_release_system_zone(sb); -failed_mount4: - iput(root); +failed_mount4a: + dput(sb->s_root); sb->s_root = NULL; +failed_mount4: ext4_msg(sb, KERN_ERR, "mount failed"); destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); failed_mount_wq: -- cgit v1.2.3 From 3c5184ef1216dd476c9c67f22a199d90ac4d5892 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Jan 2012 16:34:32 -0500 Subject: ceph: d_alloc_root() may fail ... and ceph_init_dentry(NULL) will oops Signed-off-by: Al Viro --- fs/ceph/super.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 11bd0fc4853..48f61a12af6 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -636,19 +636,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err == 0) { + struct inode *inode = req->r_target_inode; + req->r_target_inode = NULL; dout("open_root_inode success\n"); - if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && + if (ceph_ino(inode) == CEPH_INO_ROOT && fsc->sb->s_root == NULL) { - root = d_alloc_root(req->r_target_inode); + root = d_alloc_root(inode); + if (!root) { + iput(inode); + root = ERR_PTR(-ENOMEM); + goto out; + } ceph_init_dentry(root); } else { - root = d_obtain_alias(req->r_target_inode); + root = d_obtain_alias(inode); } - req->r_target_inode = NULL; dout("open_root_inode success, root dentry is %p\n", root); } else { root = ERR_PTR(err); } +out: ceph_mdsc_put_request(req); return root; } -- cgit v1.2.3 From b48f03b319ba78f3abf9a7044d1f436d8d90f4f9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 23 Aug 2011 18:56:24 +1000 Subject: dcache: use a dispose list in select_parent select_parent currently abuses the dentry cache LRU to provide cleanup features for child dentries that need to be freed. It moves them to the tail of the LRU, then tells shrink_dcache_parent() to calls __shrink_dcache_sb to unconditionally move them to a dispose list (as DCACHE_REFERENCED is ignored). __shrink_dcache_sb() has to relock the dentries to move them off the LRU onto the dispose list, but otherwise does not touch the dentries that select_parent() moved to the tail of the LRU. It then passses the dispose list to shrink_dentry_list() which tries to free the dentries. IOWs, the use of __shrink_dcache_sb() is superfluous - we can build exactly the same list of dentries for disposal directly in select_parent() and call shrink_dentry_list() instead of calling __shrink_dcache_sb() to do that. This means that we avoid long holds on the lru lock walking the LRU moving dentries to the dispose list We also avoid the need to relock each dentry just to move it off the LRU, reducing the numebr of times we lock each dentry to dispose of them in shrink_dcache_parent() from 3 to 2 times. Further, we remove one of the two callers of __shrink_dcache_sb(). This also means that __shrink_dcache_sb can be moved into back into prune_dcache_sb() and we no longer have to handle referenced dentries conditionally, simplifying the code. Signed-off-by: Dave Chinner Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- fs/dcache.c | 63 +++++++++++++++++++++---------------------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 9791b1e7eee..b209d73f9a9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -276,15 +276,15 @@ static void dentry_lru_prune(struct dentry *dentry) } } -static void dentry_lru_move_tail(struct dentry *dentry) +static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) { spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { - list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + list_add_tail(&dentry->d_lru, list); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; } else { - list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + list_move_tail(&dentry->d_lru, list); } spin_unlock(&dcache_lru_lock); } @@ -770,14 +770,18 @@ static void shrink_dentry_list(struct list_head *list) } /** - * __shrink_dcache_sb - shrink the dentry LRU on a given superblock - * @sb: superblock to shrink dentry LRU. - * @count: number of entries to prune - * @flags: flags to control the dentry processing + * prune_dcache_sb - shrink the dcache + * @sb: superblock + * @count: number of entries to try to free + * + * Attempt to shrink the superblock dcache LRU by @count entries. This is + * done when we need more memory an called from the superblock shrinker + * function. * - * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. + * This function may fail to free any resources if all the dentries are in + * use. */ -static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) +void prune_dcache_sb(struct super_block *sb, int count) { struct dentry *dentry; LIST_HEAD(referenced); @@ -796,13 +800,7 @@ relock: goto relock; } - /* - * If we are honouring the DCACHE_REFERENCED flag and the - * dentry has this flag set, don't free it. Clear the flag - * and put it back on the LRU. - */ - if (flags & DCACHE_REFERENCED && - dentry->d_flags & DCACHE_REFERENCED) { + if (dentry->d_flags & DCACHE_REFERENCED) { dentry->d_flags &= ~DCACHE_REFERENCED; list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); @@ -821,23 +819,6 @@ relock: shrink_dentry_list(&tmp); } -/** - * prune_dcache_sb - shrink the dcache - * @sb: superblock - * @nr_to_scan: number of entries to try to free - * - * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is - * done when we need more memory an called from the superblock shrinker - * function. - * - * This function may fail to free any resources if all the dentries are in - * use. - */ -void prune_dcache_sb(struct super_block *sb, int nr_to_scan) -{ - __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED); -} - /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock @@ -1092,7 +1073,7 @@ EXPORT_SYMBOL(have_submounts); * drop the lock and return early due to latency * constraints. */ -static int select_parent(struct dentry * parent) +static int select_parent(struct dentry *parent, struct list_head *dispose) { struct dentry *this_parent; struct list_head *next; @@ -1114,12 +1095,11 @@ resume: spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache + /* + * move only zero ref count dentries to the dispose list. */ if (!dentry->d_count) { - dentry_lru_move_tail(dentry); + dentry_lru_move_list(dentry, dispose); found++; } else { dentry_lru_del(dentry); @@ -1181,14 +1161,13 @@ rename_retry: * * Prune the dcache to remove unused children of the parent dentry. */ - void shrink_dcache_parent(struct dentry * parent) { - struct super_block *sb = parent->d_sb; + LIST_HEAD(dispose); int found; - while ((found = select_parent(parent)) != 0) - __shrink_dcache_sb(sb, found, 0); + while ((found = select_parent(parent, &dispose)) != 0) + shrink_dentry_list(&dispose); } EXPORT_SYMBOL(shrink_dcache_parent); -- cgit v1.2.3 From adc0e91ab142abe93f5b0d7980ada8a7676231fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Jan 2012 16:49:21 -0500 Subject: vfs: new helper - d_make_root() d_alloc_root() with iput() in case of allocation failure... Signed-off-by: Al Viro --- fs/dcache.c | 17 +++++++++++++++++ include/linux/dcache.h | 1 + 2 files changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index b209d73f9a9..3c6d3113a25 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1440,6 +1440,23 @@ struct dentry * d_alloc_root(struct inode * root_inode) } EXPORT_SYMBOL(d_alloc_root); +struct dentry *d_make_root(struct inode *root_inode) +{ + struct dentry *res = NULL; + + if (root_inode) { + static const struct qstr name = { .name = "/", .len = 1 }; + + res = __d_alloc(root_inode->i_sb, &name); + if (res) + d_instantiate(res, root_inode); + else + iput(root_inode); + } + return res; +} +EXPORT_SYMBOL(d_make_root); + static struct dentry * __d_find_any_alias(struct inode *inode) { struct dentry *alias; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ed9f74f6c51..a47bda5f76d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -249,6 +249,7 @@ extern int d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_alloc_root(struct inode *); +extern struct dentry * d_make_root(struct inode *); /* - the ramfs-type tree */ extern void d_genocide(struct dentry *); -- cgit v1.2.3 From 0b2c4e39c014219ef73f05ab580c284bf8e6af0a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 10:46:03 -0500 Subject: coda: deal correctly with allocation failure from coda_cnode_makectl() lookup should fail with ENOMEM, not silently make dentry negative. Switched to saner calling conventions, while we are at it. Signed-off-by: Al Viro --- fs/coda/cnode.c | 21 +++++++++------------ fs/coda/coda_fs_i.h | 2 +- fs/coda/dir.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 6475877b076..8af67c9c47d 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -156,19 +156,16 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) } /* the CONTROL inode is made without asking attributes from Venus */ -int coda_cnode_makectl(struct inode **inode, struct super_block *sb) +struct inode *coda_cnode_makectl(struct super_block *sb) { - int error = -ENOMEM; - - *inode = new_inode(sb); - if (*inode) { - (*inode)->i_ino = CTL_INO; - (*inode)->i_op = &coda_ioctl_inode_operations; - (*inode)->i_fop = &coda_ioctl_operations; - (*inode)->i_mode = 0444; - error = 0; + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = CTL_INO; + inode->i_op = &coda_ioctl_inode_operations; + inode->i_fop = &coda_ioctl_operations; + inode->i_mode = 0444; + return inode; } - - return error; + return ERR_PTR(-ENOMEM); } diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index e35071b1de0..1c17446f1af 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -51,7 +51,7 @@ struct coda_file_info { int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); -int coda_cnode_makectl(struct inode **inode, struct super_block *sb); +struct inode *coda_cnode_makectl(struct super_block *sb); struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 83d2fd8ec24..df0f9c1b01d 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -111,7 +111,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc /* control object, create inode on the fly */ if (coda_isroot(dir) && coda_iscontrol(name, length)) { - error = coda_cnode_makectl(&inode, dir->i_sb); + inode = coda_cnode_makectl(dir->i_sb); type = CODA_NOCACHE; goto exit; } @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - if (inode && (type & CODA_NOCACHE)) + if (inode && !IS_ERR(inode) && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); return d_splice_alias(inode, entry); -- cgit v1.2.3 From f4947fbce208990266920d51837e4e7ba9779db1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 11:11:49 -0500 Subject: coda: switch coda_cnode_make() to sane API as well, clean coda_lookup() Signed-off-by: Al Viro --- fs/coda/cnode.c | 17 +++++++---------- fs/coda/coda_fs_i.h | 2 +- fs/coda/dir.c | 29 +++++++++++++---------------- fs/coda/inode.c | 10 ++++++---- 4 files changed, 27 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 8af67c9c47d..911cf30d057 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -88,24 +88,21 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, - link the two up if this is needed - fill in the attributes */ -int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_block *sb) +struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb) { struct coda_vattr attr; + struct inode *inode; int error; /* We get inode numbers from Venus -- see venus source */ error = venus_getattr(sb, fid, &attr); - if ( error ) { - *inode = NULL; - return error; - } + if (error) + return ERR_PTR(error); - *inode = coda_iget(sb, fid, &attr); - if ( IS_ERR(*inode) ) { + inode = coda_iget(sb, fid, &attr); + if (IS_ERR(inode)) printk("coda_cnode_make: coda_iget failed\n"); - return PTR_ERR(*inode); - } - return 0; + return inode; } diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index 1c17446f1af..b24fdfd8a3f 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -49,7 +49,7 @@ struct coda_file_info { #define C_DYING 0x4 /* from venus (which died) */ #define C_PURGE 0x8 -int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); +struct inode *coda_cnode_make(struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); struct inode *coda_cnode_makectl(struct super_block *sb); struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index df0f9c1b01d..17751582906 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -96,12 +96,11 @@ const struct file_operations coda_dir_operations = { /* access routines: lookup, readlink, permission */ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) { - struct inode *inode = NULL; - struct CodaFid resfid = { { 0, } }; - int type = 0; - int error = 0; + struct super_block *sb = dir->i_sb; const char *name = entry->d_name.name; size_t length = entry->d_name.len; + struct inode *inode; + int type = 0; if (length > CODA_MAXNAMLEN) { printk(KERN_ERR "name too long: lookup, %s (%*s)\n", @@ -111,23 +110,21 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc /* control object, create inode on the fly */ if (coda_isroot(dir) && coda_iscontrol(name, length)) { - inode = coda_cnode_makectl(dir->i_sb); + inode = coda_cnode_makectl(sb); type = CODA_NOCACHE; - goto exit; + } else { + struct CodaFid fid = { { 0, } }; + int error = venus_lookup(sb, coda_i2f(dir), name, length, + &type, &fid); + inode = !error ? coda_cnode_make(&fid, sb) : ERR_PTR(error); } - error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, - &type, &resfid); - if (!error) - error = coda_cnode_make(&inode, &resfid, dir->i_sb); - - if (error && error != -ENOENT) - return ERR_PTR(error); - -exit: - if (inode && !IS_ERR(inode) && (type & CODA_NOCACHE)) + if (!IS_ERR(inode) && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); + if (inode == ERR_PTR(-ENOENT)) + inode = NULL; + return d_splice_alias(inode, entry); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 1c08a8cd673..5e2e1b3f068 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -204,10 +204,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); /* make root inode */ - error = coda_cnode_make(&root, &fid, sb); - if ( error || !root ) { - printk("Failure of coda_cnode_make for root: error %d\n", error); - goto error; + root = coda_cnode_make(&fid, sb); + if (IS_ERR(root)) { + error = PTR_ERR(root); + printk("Failure of coda_cnode_make for root: error %d\n", error); + root = NULL; + goto error; } printk("coda_read_super: rootinode is %ld dev %s\n", -- cgit v1.2.3 From d50f2ab6f050311dbf7b8f5501b25f0bf64a439b Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 10 Jan 2012 11:51:10 -0500 Subject: ext4: fix undefined behavior in ext4_fill_flex_info() Commit 503358ae01b70ce6909d19dd01287093f6b6271c ("ext4: avoid divide by zero when trying to mount a corrupted file system") fixes CVE-2009-4307 by performing a sanity check on s_log_groups_per_flex, since it can be set to a bogus value by an attacker. sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; groups_per_flex = 1 << sbi->s_log_groups_per_flex; if (groups_per_flex < 2) { ... } This patch fixes two potential issues in the previous commit. 1) The sanity check might only work on architectures like PowerPC. On x86, 5 bits are used for the shifting amount. That means, given a large s_log_groups_per_flex value like 36, groups_per_flex = 1 << 36 is essentially 1 << 4 = 16, rather than 0. This will bypass the check, leaving s_log_groups_per_flex and groups_per_flex inconsistent. 2) The sanity check relies on undefined behavior, i.e., oversized shift. A standard-confirming C compiler could rewrite the check in unexpected ways. Consider the following equivalent form, assuming groups_per_flex is unsigned for simplicity. groups_per_flex = 1 << sbi->s_log_groups_per_flex; if (groups_per_flex == 0 || groups_per_flex == 1) { We compile the code snippet using Clang 3.0 and GCC 4.6. Clang will completely optimize away the check groups_per_flex == 0, leaving the patched code as vulnerable as the original. GCC keeps the check, but there is no guarantee that future versions will do the same. Signed-off-by: Xi Wang Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 36570b7af7c..108c3af8617 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2006,17 +2006,16 @@ static int ext4_fill_flex_info(struct super_block *sb) struct ext4_group_desc *gdp = NULL; ext4_group_t flex_group_count; ext4_group_t flex_group; - int groups_per_flex = 0; + unsigned int groups_per_flex = 0; size_t size; int i; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; - groups_per_flex = 1 << sbi->s_log_groups_per_flex; - - if (groups_per_flex < 2) { + if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { sbi->s_log_groups_per_flex = 0; return 1; } + groups_per_flex = 1 << sbi->s_log_groups_per_flex; /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + -- cgit v1.2.3 From 3d8eb7a94e8f25a33362f708974ac7daae9e84f8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 11 Nov 2011 09:48:53 -0800 Subject: ceph: remove unnecessary d_fsdata conditional checks We now set d_fsdata unconditionally on all dentries prior to setting up the d_ops, so all of these checks are unnecessary. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 48 ++++++++++++++++++++---------------------------- fs/ceph/mds_client.c | 4 ++-- 2 files changed, 22 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 98954003a8d..a421555b229 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -973,7 +973,7 @@ static int dentry_lease_is_valid(struct dentry *dentry) spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); - if (di && di->lease_session) { + if (di->lease_session) { s = di->lease_session; spin_lock(&s->s_cap_lock); gen = s->s_cap_gen; @@ -1072,13 +1072,11 @@ static void ceph_d_release(struct dentry *dentry) struct ceph_dentry_info *di = ceph_dentry(dentry); dout("d_release %p\n", dentry); - if (di) { - ceph_dentry_lru_del(dentry); - if (di->lease_session) - ceph_put_mds_session(di->lease_session); - kmem_cache_free(ceph_dentry_cachep, di); - dentry->d_fsdata = NULL; - } + ceph_dentry_lru_del(dentry); + if (di->lease_session) + ceph_put_mds_session(di->lease_session); + kmem_cache_free(ceph_dentry_cachep, di); + dentry->d_fsdata = NULL; } static int ceph_snapdir_d_revalidate(struct dentry *dentry, @@ -1259,13 +1257,11 @@ void ceph_dentry_lru_add(struct dentry *dn) dout("dentry_lru_add %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_add_tail(&di->lru, &mdsc->dentry_lru); - mdsc->num_dentry++; - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_add_tail(&di->lru, &mdsc->dentry_lru); + mdsc->num_dentry++; + spin_unlock(&mdsc->dentry_lru_lock); } void ceph_dentry_lru_touch(struct dentry *dn) @@ -1275,12 +1271,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, dn->d_name.len, dn->d_name.name, di->offset); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_move_tail(&di->lru, &mdsc->dentry_lru); - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_move_tail(&di->lru, &mdsc->dentry_lru); + spin_unlock(&mdsc->dentry_lru_lock); } void ceph_dentry_lru_del(struct dentry *dn) @@ -1290,13 +1284,11 @@ void ceph_dentry_lru_del(struct dentry *dn) dout("dentry_lru_del %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); - if (di) { - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; - spin_lock(&mdsc->dentry_lru_lock); - list_del_init(&di->lru); - mdsc->num_dentry--; - spin_unlock(&mdsc->dentry_lru_lock); - } + mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + spin_lock(&mdsc->dentry_lru_lock); + list_del_init(&di->lru); + mdsc->num_dentry--; + spin_unlock(&mdsc->dentry_lru_lock); } /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6203d805eb4..23ab6a3f182 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2772,7 +2772,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, di = ceph_dentry(dentry); switch (h->action) { case CEPH_MDS_LEASE_REVOKE: - if (di && di->lease_session == session) { + if (di->lease_session == session) { if (ceph_seq_cmp(di->lease_seq, seq) > 0) h->seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); @@ -2781,7 +2781,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, break; case CEPH_MDS_LEASE_RENEW: - if (di && di->lease_session == session && + if (di->lease_session == session && di->lease_gen == session->s_cap_gen && di->lease_renew_from && di->lease_renew_after == 0) { -- cgit v1.2.3 From b8cd952b51034ad9f20ca147507ee68dc641c98c Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 13 Dec 2011 09:56:30 -0800 Subject: ceph: dereference pointer after checking for NULL moved dereference after BUG_ON Signed-off-by: Yehuda Sadeh --- fs/ceph/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 87fb132fb33..f556e76c72e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -851,11 +851,12 @@ static void ceph_set_dentry_offset(struct dentry *dn) { struct dentry *dir = dn->d_parent; struct inode *inode = dir->d_inode; - struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_inode_info *ci; struct ceph_dentry_info *di; BUG_ON(!inode); + ci = ceph_inode(inode); di = ceph_dentry(dn); spin_lock(&ci->i_ceph_lock); -- cgit v1.2.3 From ee6b1baf67591b6d7ce1a6a07544343433d5ec9e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 3 Jan 2012 08:20:40 -0800 Subject: ceph: avoid useless dget/dput in encode_fh Nothing we do here sleeps, so just do it under d_lock and avoid the dget/ dput entirely. Reported-by: Al Viro Signed-off-by: Sage Weil --- fs/ceph/export.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9fbcdecaacc..fbb2a643ef1 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -56,9 +56,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, return -EINVAL; spin_lock(&dentry->d_lock); - parent = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - + parent = dentry->d_parent; if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); @@ -81,7 +79,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } - dput(parent); + spin_unlock(&dentry->d_lock); return type; } -- cgit v1.2.3 From 2ff179e650e95c2b21841b21dc46dc2edefd04cd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 3 Jan 2012 10:09:07 -0800 Subject: ceph: avoid iput() while holding spinlock in ceph_dir_fsync ceph_mdsc_put_request() can call iput(), which can sleep. Don't do that. Fixes: #1812 Signed-off-by: Sage Weil --- fs/ceph/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a421555b229..974ef1e4d26 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1218,6 +1218,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, do { ceph_mdsc_get_request(req); spin_unlock(&ci->i_unsafe_lock); + dout("dir_fsync %p wait on tid %llu (until %llu)\n", inode, req->r_tid, last_tid); if (req->r_timeout) { @@ -1230,9 +1231,9 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, } else { wait_for_completion(&req->r_safe_completion); } - spin_lock(&ci->i_unsafe_lock); ceph_mdsc_put_request(req); + spin_lock(&ci->i_unsafe_lock); if (ret || list_empty(head)) break; req = list_entry(head->next, -- cgit v1.2.3 From eaf5f9073533cde21c7121c136f1c3f072d9cf59 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 10 Jan 2012 18:22:25 +0100 Subject: fix shrink_dcache_parent() livelock Two (or more) concurrent calls of shrink_dcache_parent() on the same dentry may cause shrink_dcache_parent() to loop forever. Here's what appears to happen: 1 - CPU0: select_parent(P) finds C and puts it on dispose list, returns 1 2 - CPU1: select_parent(P) locks P->d_lock 3 - CPU0: shrink_dentry_list() locks C->d_lock dentry_kill(C) tries to lock P->d_lock but fails, unlocks C->d_lock 4 - CPU1: select_parent(P) locks C->d_lock, moves C from dispose list being processed on CPU0 to the new dispose list, returns 1 5 - CPU0: shrink_dentry_list() finds dispose list empty, returns 6 - Goto 2 with CPU0 and CPU1 switched Basically select_parent() steals the dentry from shrink_dentry_list() and thinks it found a new one, causing shrink_dentry_list() to think it's making progress and loop over and over. One way to trigger this is to make udev calls stat() on the sysfs file while it is going away. Having a file in /lib/udev/rules.d/ with only this one rule seems to the trick: ATTR{vendor}=="0x8086", ATTR{device}=="0x10ca", ENV{PCI_SLOT_NAME}="%k", ENV{MATCHADDR}="$attr{address}", RUN+="/bin/true" Then execute the following loop: while true; do echo -bond0 > /sys/class/net/bonding_masters echo +bond0 > /sys/class/net/bonding_masters echo -bond1 > /sys/class/net/bonding_masters echo +bond1 > /sys/class/net/bonding_masters done One fix would be to check all callers and prevent concurrent calls to shrink_dcache_parent(). But I think a better solution is to stop the stealing behavior. This patch adds a new dentry flag that is set when the dentry is added to the dispose list. The flag is cleared in dentry_lru_del() in case the dentry gets a new reference just before being pruned. If the dentry has this flag, select_parent() will skip it and let shrink_dentry_list() retry pruning it. With select_parent() skipping those dentries there will not be the appearance of progress (new dentries found) when there is none, hence shrink_dcache_parent() will not loop forever. Set the flag is also set in prune_dcache_sb() for consistency as suggested by Linus. Signed-off-by: Miklos Szeredi CC: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/dcache.c | 15 +++++++++++---- include/linux/dcache.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 3c6d3113a25..616fedff011 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -243,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry) static void __dentry_lru_del(struct dentry *dentry) { list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; dentry->d_sb->s_nr_dentry_unused--; dentry_stat.nr_unused--; } @@ -806,6 +807,7 @@ relock: spin_unlock(&dentry->d_lock); } else { list_move_tail(&dentry->d_lru, &tmp); + dentry->d_flags |= DCACHE_SHRINK_LIST; spin_unlock(&dentry->d_lock); if (!--count) break; @@ -1097,14 +1099,19 @@ resume: /* * move only zero ref count dentries to the dispose list. + * + * Those which are presently on the shrink list, being processed + * by shrink_dentry_list(), shouldn't be moved. Otherwise the + * loop in shrink_dcache_parent() might not make any progress + * and loop forever. */ - if (!dentry->d_count) { + if (dentry->d_count) { + dentry_lru_del(dentry); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { dentry_lru_move_list(dentry, dispose); + dentry->d_flags |= DCACHE_SHRINK_LIST; found++; - } else { - dentry_lru_del(dentry); } - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a47bda5f76d..31f73220e7d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -203,6 +203,7 @@ struct dentry_operations { #define DCACHE_CANT_MOUNT 0x0100 #define DCACHE_GENOCIDE 0x0200 +#define DCACHE_SHRINK_LIST 0x0400 #define DCACHE_NFSFS_RENAMED 0x1000 /* this dentry has been "silly renamed" and has to be deleted on the last -- cgit v1.2.3 From ace8577aeb438025ecf642f5eda3aa551d251951 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 10 Jan 2012 02:43:59 +0300 Subject: block_dev: Suppress bdev_cache_init() kmemleak warninig Kmemleak reports the following warning in bdev_cache_init() [ 0.003738] kmemleak: Object 0xffff880153035200 (size 256): [ 0.003823] kmemleak: comm "swapper/0", pid 0, jiffies 4294667299 [ 0.003909] kmemleak: min_count = 1 [ 0.003988] kmemleak: count = 0 [ 0.004066] kmemleak: flags = 0x1 [ 0.004144] kmemleak: checksum = 0 [ 0.004224] kmemleak: backtrace: [ 0.004303] [] kmemleak_alloc+0x21/0x3e [ 0.004446] [] kmem_cache_alloc+0xca/0x1dc [ 0.004592] [] alloc_vfsmnt+0x1f/0x198 [ 0.004736] [] vfs_kern_mount+0x36/0xd2 [ 0.004879] [] kern_mount_data+0x18/0x32 [ 0.005025] [] bdev_cache_init+0x51/0x81 [ 0.005169] [] vfs_caches_init+0x101/0x10d [ 0.005313] [] start_kernel+0x344/0x383 [ 0.005456] [] x86_64_start_reservations+0xae/0xb2 [ 0.005602] [] x86_64_start_kernel+0x102/0x111 [ 0.005747] [] 0xffffffffffffffff [ 0.008653] kmemleak: Trying to color unknown object at 0xffff880153035220 as Grey [ 0.008754] Pid: 0, comm: swapper/0 Not tainted 3.3.0-rc0-dbg-04200-g8180888-dirty #888 [ 0.008856] Call Trace: [ 0.008934] [] ? find_and_get_object+0x44/0x118 [ 0.009023] [] paint_ptr+0x57/0x8f [ 0.009109] [] kmemleak_not_leak+0x23/0x42 [ 0.009195] [] bdev_cache_init+0x72/0x81 [ 0.009282] [] vfs_caches_init+0x101/0x10d [ 0.009368] [] start_kernel+0x344/0x383 [ 0.009466] [] x86_64_start_reservations+0xae/0xb2 [ 0.009555] [] ? early_idt_handlers+0x140/0x140 [ 0.009643] [] x86_64_start_kernel+0x102/0x111 due to attempt to mark pointer to `struct vfsmount' as a gray object, which is embedded into `struct mount' returned from alloc_vfsmnt(). Make `bd_mnt' static, avoiding need to tell kmemleak to mark it gray, as suggested by Al Viro. Signed-off-by: Sergey Senozhatsky Signed-off-by: Al Viro --- fs/block_dev.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 69a5b6fbee2..afe74dda632 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -521,7 +520,7 @@ static struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; - struct vfsmount *bd_mnt; + static struct vfsmount *bd_mnt; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| @@ -533,12 +532,7 @@ void __init bdev_cache_init(void) bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); - /* - * This vfsmount structure is only used to obtain the - * blockdev_superblock, so tell kmemleak not to report it. - */ - kmemleak_not_leak(bd_mnt); - blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ + blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } /* -- cgit v1.2.3 From b3f2a92447b8443360ac117a3d7c06689562a70c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 17:48:52 -0500 Subject: hfsplus: creation of hidden dir on mount can fail Signed-off-by: Al Viro --- fs/hfsplus/super.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index edf0a801446..427682ca9e4 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -499,9 +499,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->hidden_dir) { mutex_lock(&sbi->vh_mutex); sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); - hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, - sbi->hidden_dir); + if (!sbi->hidden_dir) { + mutex_unlock(&sbi->vh_mutex); + err = -ENOMEM; + goto out_put_root; + } + err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, + &str, sbi->hidden_dir); mutex_unlock(&sbi->vh_mutex); + if (err) + goto out_put_hidden_dir; hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); -- cgit v1.2.3 From 5f8aefd44e64ed2f6950a1dcc77309b7dd9979f4 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 10 Jan 2012 15:07:18 -0800 Subject: mm: account reaped page cache on inode cache pruning Inode cache pruning indirectly reclaims page-cache by invalidating mapping pages. Let's account them into reclaim-state to notice this progress in memory reclaimer. Signed-off-by: Konstantin Khlebnikov Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 87535753ab0..4fa4f0916af 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -776,6 +776,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) else __count_vm_events(PGINODESTEAL, reap); spin_unlock(&sb->s_inode_lru_lock); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += reap; dispose_list(&freeable); } -- cgit v1.2.3 From e3a41a5ba9c2ab988b9f1442925109dca2382fd9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 10 Jan 2012 15:07:55 -0800 Subject: btrfs: pass __GFP_WRITE for buffered write page allocations Tell the page allocator that pages allocated for a buffered write are expected to become dirty soon. Signed-off-by: Johannes Weiner Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Minchan Kim Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Hellwig Cc: Wu Fengguang Cc: Dave Chinner Cc: Jan Kara Cc: Shaohua Li Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c05..20375e6691c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, again: for (i = 0; i < num_pages; i++) { pages[i] = find_or_create_page(inode->i_mapping, index + i, - mask); + mask | __GFP_WRITE); if (!pages[i]) { faili = i - 1; err = -ENOMEM; -- cgit v1.2.3 From 43d2b113241d6797b890318767e0af78e313414b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 10 Jan 2012 15:08:09 -0800 Subject: tracepoint: add tracepoints for debugging oom_score_adj oom_score_adj is used for guarding processes from OOM-Killer. One of problem is that it's inherited at fork(). When a daemon set oom_score_adj and make children, it's hard to know where the value is set. This patch adds some tracepoints useful for debugging. This patch adds 3 trace points. - creating new task - renaming a task (exec) - set oom_score_adj To debug, users need to enable some trace pointer. Maybe filtering is useful as # EVENT=/sys/kernel/debug/tracing/events/task/ # echo "oom_score_adj != 0" > $EVENT/task_newtask/filter # echo "oom_score_adj != 0" > $EVENT/task_rename/filter # echo 1 > $EVENT/enable # EVENT=/sys/kernel/debug/tracing/events/oom/ # echo 1 > $EVENT/enable output will be like this. # grep oom /sys/kernel/debug/tracing/trace bash-7699 [007] d..3 5140.744510: oom_score_adj_update: pid=7699 comm=bash oom_score_adj=-1000 bash-7699 [007] ...1 5151.818022: task_newtask: pid=7729 comm=bash clone_flags=1200011 oom_score_adj=-1000 ls-7729 [003] ...2 5151.818504: task_rename: pid=7729 oldcomm=bash newcomm=ls oom_score_adj=-1000 bash-7699 [002] ...1 5175.701468: task_newtask: pid=7730 comm=bash clone_flags=1200011 oom_score_adj=-1000 grep-7730 [007] ...2 5175.701993: task_rename: pid=7730 oldcomm=bash newcomm=grep oom_score_adj=-1000 Signed-off-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 4 +++ fs/proc/base.c | 3 +++ include/trace/events/oom.h | 33 ++++++++++++++++++++++++ include/trace/events/task.h | 61 +++++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 6 +++++ mm/oom_kill.c | 6 +++++ 6 files changed, 113 insertions(+) create mode 100644 include/trace/events/oom.h create mode 100644 include/trace/events/task.h (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3f64b9f26e7..aeb135c7ff5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -59,6 +59,8 @@ #include #include #include + +#include #include "internal.h" int core_uses_pid; @@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); + trace_task_rename(tsk, buf); + /* * Threads may access current->comm without holding * the task lock, so write the string carefully. diff --git a/fs/proc/base.c b/fs/proc/base.c index a1dddda999f..1aab5fe05a1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -86,6 +86,7 @@ #ifdef CONFIG_HARDWALL #include #endif +#include #include "internal.h" /* NOTE: @@ -1010,6 +1011,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, else task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + trace_oom_score_adj_update(task); err_sighand: unlock_task_sighand(task, &flags); err_task_lock: @@ -1097,6 +1099,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, task->signal->oom_score_adj = oom_score_adj; if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = oom_score_adj; + trace_oom_score_adj_update(task); /* * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is * always attainable. diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h new file mode 100644 index 00000000000..dd4ba3b9200 --- /dev/null +++ b/include/trace/events/oom.h @@ -0,0 +1,33 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM oom + +#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_OOM_H +#include + +TRACE_EVENT(oom_score_adj_update, + + TP_PROTO(struct task_struct *task), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, comm, TASK_COMM_LEN ) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d comm=%s oom_score_adj=%d", + __entry->pid, __entry->comm, __entry->oom_score_adj) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/task.h b/include/trace/events/task.h new file mode 100644 index 00000000000..b53add02e92 --- /dev/null +++ b/include/trace/events/task.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM task + +#if !defined(_TRACE_TASK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TASK_H +#include + +TRACE_EVENT(task_newtask, + + TP_PROTO(struct task_struct *task, unsigned long clone_flags), + + TP_ARGS(task, clone_flags), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, comm, TASK_COMM_LEN) + __field( unsigned long, clone_flags) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->clone_flags = clone_flags; + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%d", + __entry->pid, __entry->comm, + __entry->clone_flags, __entry->oom_score_adj) +); + +TRACE_EVENT(task_rename, + + TP_PROTO(struct task_struct *task, char *comm), + + TP_ARGS(task, comm), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, oldcomm, TASK_COMM_LEN) + __array( char, newcomm, TASK_COMM_LEN) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); + memcpy(entry->newcomm, comm, TASK_COMM_LEN); + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%d", + __entry->pid, __entry->oldcomm, + __entry->newcomm, __entry->oom_score_adj) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/kernel/fork.c b/kernel/fork.c index b00711ce7c1..5e1391b5ade 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -76,6 +76,9 @@ #include +#define CREATE_TRACE_POINTS +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ @@ -1370,6 +1373,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); + + trace_task_newtask(p, clone_flags); + return p; bad_fork_free_pid: diff --git a/mm/oom_kill.c b/mm/oom_kill.c index eeb27e27dce..7c122faa05c 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -33,6 +33,10 @@ #include #include #include +#include + +#define CREATE_TRACE_POINTS +#include int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; @@ -55,6 +59,7 @@ void compare_swap_oom_score_adj(int old_val, int new_val) spin_lock_irq(&sighand->siglock); if (current->signal->oom_score_adj == old_val) current->signal->oom_score_adj = new_val; + trace_oom_score_adj_update(current); spin_unlock_irq(&sighand->siglock); } @@ -74,6 +79,7 @@ int test_set_oom_score_adj(int new_val) spin_lock_irq(&sighand->siglock); old_val = current->signal->oom_score_adj; current->signal->oom_score_adj = new_val; + trace_oom_score_adj_update(current); spin_unlock_irq(&sighand->siglock); return old_val; -- cgit v1.2.3 From e39f560239984c3098237ad94c9449b1494163f8 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 10 Jan 2012 15:10:21 -0800 Subject: fs: binfmt_elf: create Kconfig variable for PIE randomization Randomization of PIE load address is hard coded in binfmt_elf.c for X86 and ARM. Create a new Kconfig variable (CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE) for this and use it instead. Thus architecture specific policy is pushed out of the generic binfmt_elf.c and into the architecture Kconfig files. X86 and ARM Kconfigs are modified to select the new variable so there is no change in behavior. A follow on patch will select it for MIPS too. Signed-off-by: David Daney Cc: Russell King Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Acked-by: H. Peter Anvin Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 1 + arch/x86/Kconfig | 1 + fs/Kconfig.binfmt | 3 +++ fs/binfmt_elf.c | 2 +- 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9d66dfc33a5..98a6459cd39 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -16,6 +16,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) + select ARCH_BINFMT_ELF_RANDOMIZE_PIE select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d2a69dd36d..d6ddc0bfe36 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -62,6 +62,7 @@ config X86 select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER + select ARCH_BINFMT_ELF_RANDOMIZE_PIE select HAVE_ARCH_JUMP_LABEL select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 79e2ca7973b..e95d1b64082 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF +config ARCH_BINFMT_ELF_RANDOMIZE_PIE + bool + config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" default y diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 21ac5ee4b43..bcb884e2d61 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#if defined(CONFIG_X86) || defined(CONFIG_ARM) +#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE /* Memory randomization might have been switched off * in runtime via sysctl. * If that is the case, retain the original non-zero -- cgit v1.2.3 From b18c1c6e0c90cbcd38ba879bd63a44c94e4f7301 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 10 Jan 2012 15:11:05 -0800 Subject: reiserfs: delete comments referring to the BKL Signed-off-by: Davidlohr Bueso Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/journal.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index eb711060a6f..cce8e8710df 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2896,14 +2896,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, journal->j_cnode_free < (journal->j_trans_max * 3)) { return 1; } - /* protected by the BKL here */ + journal->j_len_alloc += new_alloc; th->t_blocks_allocated += new_alloc ; return 0; } -/* this must be called inside a transaction, and requires the -** kernel_lock to be held +/* this must be called inside a transaction */ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) { @@ -2914,8 +2913,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) return; } -/* this must be called without a transaction started, and does not -** require BKL +/* this must be called without a transaction started */ void reiserfs_allow_writes(struct super_block *s) { @@ -2924,8 +2922,7 @@ void reiserfs_allow_writes(struct super_block *s) wake_up(&journal->j_join_wait); } -/* this must be called without a transaction started, and does not -** require BKL +/* this must be called without a transaction started */ void reiserfs_wait_on_write_block(struct super_block *s) { -- cgit v1.2.3 From f32485be8397ad811312bc055d2e2a5906bc7576 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Jan 2012 15:11:07 -0800 Subject: reiserfs: delay reiserfs lock until journal initialization In the mount path, transactions that are made before journal initialization don't involve the filesystem. We can delay the reiserfs lock until we play with the journal. Signed-off-by: Frederic Weisbecker Cc: Al Viro Cc: Christoph Hellwig Cc: Jeff Mahoney Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 3 --- fs/reiserfs/super.c | 43 ++++++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index a945cd26522..70de42f09f1 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1364,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb) struct reiserfs_bitmap_info *bitmap; unsigned int bmap_nr = reiserfs_bmap_count(sb); - /* Avoid lock recursion in fault case */ - reiserfs_write_unlock(sb); bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); - reiserfs_write_lock(sb); if (bitmap == NULL) return -ENOMEM; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1d42e707d5f..620dd5d9e1b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1746,22 +1746,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) mutex_init(&REISERFS_SB(s)->lock); REISERFS_SB(s)->lock_depth = -1; - /* - * This function is called with the bkl, which also was the old - * locking used here. - * do_journal_begin() will soon check if we hold the lock (ie: was the - * bkl). This is likely because do_journal_begin() has several another - * callers because at this time, it doesn't seem to be necessary to - * protect against anything. - * Anyway, let's be conservative and lock for now. - */ - reiserfs_write_lock(s); - jdev_name = NULL; if (reiserfs_parse_options (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, &commit_max_age, qf_names, &qfmt) == 0) { - goto error; + goto error_unlocked; } if (jdev_name && jdev_name[0]) { REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); @@ -1777,7 +1766,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (blocks) { SWARN(silent, s, "jmacd-7", "resize option for remount only"); - goto error; + goto error_unlocked; } /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ @@ -1787,7 +1776,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", reiserfs_bdevname(s)); - goto error; + goto error_unlocked; } rs = SB_DISK_SUPER_BLOCK(s); @@ -1803,7 +1792,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) "or increase size of your LVM partition"); SWARN(silent, s, "", "Or may be you forgot to " "reboot after fdisk when it told you to"); - goto error; + goto error_unlocked; } sbi->s_mount_state = SB_REISERFS_STATE(s); @@ -1811,8 +1800,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if ((errval = reiserfs_init_bitmap_cache(s))) { SWARN(silent, s, "jmacd-8", "unable to read bitmap"); - goto error; + goto error_unlocked; } + errval = -EINVAL; #ifdef CONFIG_REISERFS_CHECK SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); @@ -1835,6 +1825,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (reiserfs_barrier_flush(s)) { printk("reiserfs: using flush barriers\n"); } + + /* + * This path assumed to be called with the BKL in the old times. + * Now we have inherited the big reiserfs lock from it and many + * reiserfs helpers called in the mount path and elsewhere require + * this lock to be held even if it's not always necessary. Let's be + * conservative and hold it early. The window can be reduced after + * careful review of the code. + */ + reiserfs_write_lock(s); + // set_device_ro(s->s_dev, 1) ; if (journal_init(s, jdev_name, old_format, commit_max_age)) { SWARN(silent, s, "sh-2022", @@ -1995,12 +1996,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return (0); error: - if (jinit_done) { /* kill the commit thread, free journal ram */ + reiserfs_write_unlock(s); + +error_unlocked: + /* kill the commit thread, free journal ram */ + if (jinit_done) { + reiserfs_write_lock(s); journal_release_error(NULL, s); + reiserfs_write_unlock(s); } - reiserfs_write_unlock(s); - reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); -- cgit v1.2.3 From 37c69b98d0dca54d9eb72226bbf2e211aaaf126e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Jan 2012 15:11:09 -0800 Subject: reiserfs: don't lock journal_init() journal_init() doesn't need the lock since no operation on the filesystem is involved there. journal_read() and get_list_bitmap() have yet to be reviewed carefully though before removing the lock there. Just keep the it around these two calls for safety. Signed-off-by: Frederic Weisbecker Cc: Al Viro Cc: Christoph Hellwig Cc: Jeff Mahoney Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/journal.c | 53 +++++++++++++++++++-------------------------------- fs/reiserfs/super.c | 21 ++++++++++---------- 2 files changed, 31 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index cce8e8710df..c3cf54fd4de 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, char b[BDEVNAME_SIZE]; int ret; - /* - * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS - * dependency inversion warnings. - */ - reiserfs_write_unlock(sb); journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); if (!journal) { reiserfs_warning(sb, "journal-1256", "unable to get memory for journal structure"); - reiserfs_write_lock(sb); return 1; } INIT_LIST_HEAD(&journal->j_bitmap_nodes); @@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name, INIT_LIST_HEAD(&journal->j_working_list); INIT_LIST_HEAD(&journal->j_journal_list); journal->j_persistent_trans = 0; - ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, - reiserfs_bmap_count(sb)); - reiserfs_write_lock(sb); - if (ret) + if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, + reiserfs_bmap_count(sb))) goto free_and_return; allocate_bitmap_nodes(sb); @@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name, goto free_and_return; } - /* - * We need to unlock here to avoid creating the following - * dependency: - * reiserfs_lock -> sysfs_mutex - * Because the reiserfs mmap path creates the following dependency: - * mm->mmap -> reiserfs_lock, hence we have - * mm->mmap -> reiserfs_lock ->sysfs_mutex - * This would ends up in a circular dependency with sysfs readdir path - * which does sysfs_mutex -> mm->mmap_sem - * This is fine because the reiserfs lock is useless in mount path, - * at least until we call journal_begin. We keep it for paranoid - * reasons. - */ - reiserfs_write_unlock(sb); if (journal_init_dev(sb, journal, j_dev_name) != 0) { - reiserfs_write_lock(sb); reiserfs_warning(sb, "sh-462", "unable to initialize jornal device"); goto free_and_return; } - reiserfs_write_lock(sb); rs = SB_DISK_SUPER_BLOCK(sb); @@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, journal->j_mount_id = 10; journal->j_state = 0; atomic_set(&(journal->j_jlock), 0); - reiserfs_write_unlock(sb); journal->j_cnode_free_list = allocate_cnodes(num_cnodes); - reiserfs_write_lock(sb); journal->j_cnode_free_orig = journal->j_cnode_free_list; journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; journal->j_cnode_used = 0; @@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name, init_journal_hash(sb); jl = journal->j_current_jl; + + /* + * get_list_bitmap() may call flush_commit_list() which + * requires the lock. Calling flush_commit_list() shouldn't happen + * this early but I like to be paranoid. + */ + reiserfs_write_lock(sb); jl->j_list_bitmap = get_list_bitmap(sb, jl); + reiserfs_write_unlock(sb); if (!jl->j_list_bitmap) { reiserfs_warning(sb, "journal-2005", "get_list_bitmap failed for journal list 0"); goto free_and_return; } - if (journal_read(sb) < 0) { + + /* + * Journal_read needs to be inspected in order to push down + * the lock further inside (or even remove it). + */ + reiserfs_write_lock(sb); + ret = journal_read(sb); + reiserfs_write_unlock(sb); + if (ret < 0) { reiserfs_warning(sb, "reiserfs-2006", "Replay Failure, unable to mount"); goto free_and_return; } reiserfs_mounted_fs_count++; - if (reiserfs_mounted_fs_count <= 1) { - reiserfs_write_unlock(sb); + if (reiserfs_mounted_fs_count <= 1) commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); - reiserfs_write_lock(sb); - } INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); journal->j_work_sb = sb; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 620dd5d9e1b..61b60380d2c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1826,6 +1826,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) printk("reiserfs: using flush barriers\n"); } + // set_device_ro(s->s_dev, 1) ; + if (journal_init(s, jdev_name, old_format, commit_max_age)) { + SWARN(silent, s, "sh-2022", + "unable to initialize journal space"); + goto error_unlocked; + } else { + jinit_done = 1; /* once this is set, journal_release must be called + ** if we error out of the mount + */ + } + /* * This path assumed to be called with the BKL in the old times. * Now we have inherited the big reiserfs lock from it and many @@ -1836,16 +1847,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) */ reiserfs_write_lock(s); - // set_device_ro(s->s_dev, 1) ; - if (journal_init(s, jdev_name, old_format, commit_max_age)) { - SWARN(silent, s, "sh-2022", - "unable to initialize journal space"); - goto error; - } else { - jinit_done = 1; /* once this is set, journal_release must be called - ** if we error out of the mount - */ - } if (reread_meta_blocks(s)) { SWARN(silent, s, "jmacd-9", "unable to reread meta blocks after journal init"); -- cgit v1.2.3 From 9b467e6ebebbe75288aeb7e816ffbb5d35d6eaa3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Jan 2012 15:11:11 -0800 Subject: reiserfs: don't lock root inode searching Nothing requires that we lock the filesystem until the root inode is provided. Also iget5_locked() triggers a warning because we are holding the filesystem lock while allocating the inode, which result in a lockdep suspicion that we have a lock inversion against the reclaim path: [ 1986.896979] ================================= [ 1986.896990] [ INFO: inconsistent lock state ] [ 1986.896997] 3.1.1-main #8 [ 1986.897001] --------------------------------- [ 1986.897007] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. [ 1986.897016] kswapd0/16 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 1986.897023] (&REISERFS_SB(s)->lock){+.+.?.}, at: [] reiserfs_write_lock+0x20/0x2a [ 1986.897044] {RECLAIM_FS-ON-W} state was registered at: [ 1986.897050] [] mark_held_locks+0xae/0xd0 [ 1986.897060] [] lockdep_trace_alloc+0x7d/0x91 [ 1986.897068] [] kmem_cache_alloc+0x1a/0x93 [ 1986.897078] [] reiserfs_alloc_inode+0x13/0x3d [ 1986.897088] [] alloc_inode+0x14/0x5f [ 1986.897097] [] iget5_locked+0x62/0x13a [ 1986.897106] [] reiserfs_fill_super+0x410/0x8b9 [ 1986.897114] [] mount_bdev+0x10b/0x159 [ 1986.897123] [] get_super_block+0x10/0x12 [ 1986.897131] [] mount_fs+0x59/0x12d [ 1986.897138] [] vfs_kern_mount+0x45/0x7a [ 1986.897147] [] do_kern_mount+0x2f/0xb0 [ 1986.897155] [] do_mount+0x5c2/0x612 [ 1986.897163] [] sys_mount+0x61/0x8f [ 1986.897170] [] sysenter_do_call+0x12/0x32 [ 1986.897181] irq event stamp: 7509691 [ 1986.897186] hardirqs last enabled at (7509691): [] kmem_cache_alloc+0x6e/0x93 [ 1986.897197] hardirqs last disabled at (7509690): [] kmem_cache_alloc+0x24/0x93 [ 1986.897209] softirqs last enabled at (7508896): [] __do_softirq+0xee/0xfd [ 1986.897222] softirqs last disabled at (7508859): [] do_softirq+0x50/0x9d [ 1986.897234] [ 1986.897235] other info that might help us debug this: [ 1986.897242] Possible unsafe locking scenario: [ 1986.897244] [ 1986.897250] CPU0 [ 1986.897254] ---- [ 1986.897257] lock(&REISERFS_SB(s)->lock); [ 1986.897265] [ 1986.897269] lock(&REISERFS_SB(s)->lock); [ 1986.897276] [ 1986.897277] *** DEADLOCK *** [ 1986.897278] [ 1986.897286] no locks held by kswapd0/16. [ 1986.897291] [ 1986.897292] stack backtrace: [ 1986.897299] Pid: 16, comm: kswapd0 Not tainted 3.1.1-main #8 [ 1986.897306] Call Trace: [ 1986.897314] [] ? printk+0xf/0x11 [ 1986.897324] [] print_usage_bug+0x20e/0x21a [ 1986.897332] [] ? print_irq_inversion_bug+0x172/0x172 [ 1986.897341] [] mark_lock+0x27f/0x483 [ 1986.897349] [] __lock_acquire+0x628/0x1472 [ 1986.897358] [] lock_acquire+0x47/0x5e [ 1986.897366] [] ? reiserfs_write_lock+0x20/0x2a [ 1986.897384] [] ? reiserfs_write_lock+0x20/0x2a [ 1986.897397] [] mutex_lock_nested+0x35/0x26f [ 1986.897409] [] ? reiserfs_write_lock+0x20/0x2a [ 1986.897421] [] reiserfs_write_lock+0x20/0x2a [ 1986.897433] [] map_block_for_writepage+0xc9/0x590 [ 1986.897448] [] ? create_empty_buffers+0x33/0x8f [ 1986.897461] [] ? get_parent_ip+0xb/0x31 [ 1986.897472] [] ? sub_preempt_count+0x81/0x8e [ 1986.897485] [] ? _raw_spin_unlock+0x27/0x3d [ 1986.897496] [] ? get_parent_ip+0xb/0x31 [ 1986.897508] [] reiserfs_writepage+0x1b9/0x3e7 [ 1986.897521] [] ? clear_page_dirty_for_io+0xcb/0xde [ 1986.897533] [] ? trace_hardirqs_on_caller+0x108/0x138 [ 1986.897546] [] ? trace_hardirqs_on+0xb/0xd [ 1986.897559] [] shrink_page_list+0x34f/0x5e2 [ 1986.897572] [] shrink_inactive_list+0x172/0x22c [ 1986.897585] [] shrink_zone+0x303/0x3b1 [ 1986.897597] [] ? _raw_spin_unlock+0x27/0x3d [ 1986.897611] [] kswapd+0x3b7/0x5f2 The deadlock shouldn't happen since we are doing that allocation in the mount path, the filesystem is not available for any reclaim. Still the warning is annoying. To solve this, acquire the lock later only where we need it, right before calling reiserfs_read_locked_inode() that wants to lock to walk the tree. Reported-by: Knut Petersen Signed-off-by: Frederic Weisbecker Cc: Al Viro Cc: Christoph Hellwig Cc: Jeff Mahoney Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/super.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 61b60380d2c..e12d8b97cd4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1519,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset) static int reread_meta_blocks(struct super_block *s) { ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); - reiserfs_write_unlock(s); wait_on_buffer(SB_BUFFER_WITH_SB(s)); - reiserfs_write_lock(s); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); return 1; @@ -1837,24 +1835,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) */ } - /* - * This path assumed to be called with the BKL in the old times. - * Now we have inherited the big reiserfs lock from it and many - * reiserfs helpers called in the mount path and elsewhere require - * this lock to be held even if it's not always necessary. Let's be - * conservative and hold it early. The window can be reduced after - * careful review of the code. - */ - reiserfs_write_lock(s); - if (reread_meta_blocks(s)) { SWARN(silent, s, "jmacd-9", "unable to reread meta blocks after journal init"); - goto error; + goto error_unlocked; } if (replay_only(s)) - goto error; + goto error_unlocked; if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { SWARN(silent, s, "clm-7000", @@ -1868,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_init_locked_inode, (void *)(&args)); if (!root_inode) { SWARN(silent, s, "jmacd-10", "get root inode failed"); - goto error; + goto error_unlocked; } + /* + * This path assumed to be called with the BKL in the old times. + * Now we have inherited the big reiserfs lock from it and many + * reiserfs helpers called in the mount path and elsewhere require + * this lock to be held even if it's not always necessary. Let's be + * conservative and hold it early. The window can be reduced after + * careful review of the code. + */ + reiserfs_write_lock(s); + if (root_inode->i_state & I_NEW) { reiserfs_read_locked_inode(root_inode, &args); unlock_new_inode(root_inode); -- cgit v1.2.3 From 7773fbc54182a90cd248656619c7d33859e5f91d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 10 Jan 2012 15:11:20 -0800 Subject: procfs: make proc_get_link to use dentry instead of inode Prepare the ground for the next "map_files" patch which needs a name of a link file to analyse. Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Tejun Heo Cc: Vasiliy Kulikov Cc: "Kirill A. Shutemov" Cc: Alexey Dobriyan Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 ++++++++++---------- include/linux/proc_fs.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 1aab5fe05a1..e31d95055c6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -166,9 +166,9 @@ static int get_task_root(struct task_struct *task, struct path *root) return result; } -static int proc_cwd_link(struct inode *inode, struct path *path) +static int proc_cwd_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(inode); + struct task_struct *task = get_proc_task(dentry->d_inode); int result = -ENOENT; if (task) { @@ -183,9 +183,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path) return result; } -static int proc_root_link(struct inode *inode, struct path *path) +static int proc_root_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(inode); + struct task_struct *task = get_proc_task(dentry->d_inode); int result = -ENOENT; if (task) { @@ -1456,13 +1456,13 @@ static const struct file_operations proc_pid_set_comm_operations = { .release = single_release, }; -static int proc_exe_link(struct inode *inode, struct path *exe_path) +static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; struct mm_struct *mm; struct file *exe_file; - task = get_proc_task(inode); + task = get_proc_task(dentry->d_inode); if (!task) return -ENOENT; mm = get_task_mm(task); @@ -1492,7 +1492,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); + error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); out: return ERR_PTR(error); } @@ -1531,7 +1531,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; @@ -1823,9 +1823,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) return -ENOENT; } -static int proc_fd_link(struct inode *inode, struct path *path) +static int proc_fd_link(struct dentry *dentry, struct path *path) { - return proc_fd_info(inode, path, NULL); + return proc_fd_info(dentry->d_inode, path, NULL); } static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 6d9e575519c..85c50730623 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; union proc_op { - int (*proc_get_link)(struct inode *, struct path *); + int (*proc_get_link)(struct dentry *, struct path *); int (*proc_read)(struct task_struct *task, char *page); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, -- cgit v1.2.3 From 640708a2cff7f81e246243b0073c66e6ece7e53e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 10 Jan 2012 15:11:23 -0800 Subject: procfs: introduce the /proc//map_files/ directory This one behaves similarly to the /proc//fd/ one - it contains symlinks one for each mapping with file, the name of a symlink is "vma->vm_start-vma->vm_end", the target is the file. Opening a symlink results in a file that point exactly to the same inode as them vma's one. For example the ls -l of some arbitrary /proc//map_files/ | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80403000-7f8f80404000 -> /lib64/libc-2.5.so | lr-x------ 1 root root 64 Aug 26 06:40 7f8f8061e000-7f8f80620000 -> /lib64/libselinux.so.1 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80826000-7f8f80827000 -> /lib64/libacl.so.1.1.0 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a2f000-7f8f80a30000 -> /lib64/librt-2.5.so | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a30000-7f8f80a4c000 -> /lib64/ld-2.5.so This *helps* checkpointing process in three ways: 1. When dumping a task mappings we do know exact file that is mapped by particular region. We do this by opening /proc/$pid/map_files/$address symlink the way we do with file descriptors. 2. This also helps in determining which anonymous shared mappings are shared with each other by comparing the inodes of them. 3. When restoring a set of processes in case two of them has a mapping shared, we map the memory by the 1st one and then open its /proc/$pid/map_files/$address file and map it by the 2nd task. Using /proc/$pid/maps for this is quite inconvenient since it brings repeatable re-reading and reparsing for this text file which slows down restore procedure significantly. Also as being pointed in (3) it is a way easier to use top level shared mapping in children as /proc/$pid/map_files/$address when needed. [akpm@linux-foundation.org: coding-style fixes] [gorcunov@openvz.org: make map_files depend on CHECKPOINT_RESTORE] Signed-off-by: Pavel Emelyanov Signed-off-by: Cyrill Gorcunov Reviewed-by: Vasiliy Kulikov Reviewed-by: "Kirill A. Shutemov" Cc: Tejun Heo Cc: Alexey Dobriyan Cc: Al Viro Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 355 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 12 ++ 2 files changed, 367 insertions(+) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index e31d95055c6..4d755fed3ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -83,6 +83,7 @@ #include #include #include +#include #ifdef CONFIG_HARDWALL #include #endif @@ -134,6 +135,8 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) +static int proc_fd_permission(struct inode *inode, int mask); + /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -2046,6 +2049,355 @@ static const struct file_operations proc_fd_operations = { .llseek = default_llseek, }; +#ifdef CONFIG_CHECKPOINT_RESTORE + +/* + * dname_to_vma_addr - maps a dentry name into two unsigned longs + * which represent vma start and end addresses. + */ +static int dname_to_vma_addr(struct dentry *dentry, + unsigned long *start, unsigned long *end) +{ + if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + return -EINVAL; + + return 0; +} + +static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + unsigned long vm_start, vm_end; + bool exact_vma_exists = false; + struct mm_struct *mm = NULL; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int status = 0; + + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + if (!capable(CAP_SYS_ADMIN)) { + status = -EACCES; + goto out_notask; + } + + inode = dentry->d_inode; + task = get_proc_task(inode); + if (!task) + goto out_notask; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out; + + mm = get_task_mm(task); + if (!mm) + goto out; + + if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { + down_read(&mm->mmap_sem); + exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); + up_read(&mm->mmap_sem); + } + + mmput(mm); + + if (exact_vma_exists) { + if (task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + security_task_to_inode(task, inode); + status = 1; + } + +out: + put_task_struct(task); + +out_notask: + if (status <= 0) + d_drop(dentry); + + return status; +} + +static const struct dentry_operations tid_map_files_dentry_operations = { + .d_revalidate = map_files_d_revalidate, + .d_delete = pid_delete_dentry, +}; + +static int proc_map_files_get_link(struct dentry *dentry, struct path *path) +{ + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + int rc; + + rc = -ENOENT; + task = get_proc_task(dentry->d_inode); + if (!task) + goto out; + + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; + + rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); + if (rc) + goto out_mmput; + + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (vma && vma->vm_file) { + *path = vma->vm_file->f_path; + path_get(path); + rc = 0; + } + up_read(&mm->mmap_sem); + +out_mmput: + mmput(mm); +out: + return rc; +} + +struct map_files_info { + struct file *file; + unsigned long len; + unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ +}; + +static struct dentry * +proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct file *file = ptr; + struct proc_inode *ei; + struct inode *inode; + + if (!file) + return ERR_PTR(-ENOENT); + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + ei->op.proc_get_link = proc_map_files_get_link; + + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + inode->i_mode = S_IFLNK; + + if (file->f_mode & FMODE_READ) + inode->i_mode |= S_IRUSR; + if (file->f_mode & FMODE_WRITE) + inode->i_mode |= S_IWUSR; + + d_set_d_op(dentry, &tid_map_files_dentry_operations); + d_add(dentry, inode); + + return NULL; +} + +static struct dentry *proc_map_files_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + struct dentry *result; + struct mm_struct *mm; + + result = ERR_PTR(-EACCES); + if (!capable(CAP_SYS_ADMIN)) + goto out; + + result = ERR_PTR(-ENOENT); + task = get_proc_task(dir); + if (!task) + goto out; + + result = ERR_PTR(-EACCES); + if (lock_trace(task)) + goto out_put_task; + + result = ERR_PTR(-ENOENT); + if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) + goto out_unlock; + + mm = get_task_mm(task); + if (!mm) + goto out_unlock; + + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (!vma) + goto out_no_vma; + + result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); + +out_no_vma: + up_read(&mm->mmap_sem); + mmput(mm); +out_unlock: + unlock_trace(task); +out_put_task: + put_task_struct(task); +out: + return result; +} + +static const struct inode_operations proc_map_files_inode_operations = { + .lookup = proc_map_files_lookup, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static int +proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + ino_t ino; + int ret; + + ret = -EACCES; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -ENOENT; + task = get_proc_task(inode); + if (!task) + goto out; + + ret = -EACCES; + if (lock_trace(task)) + goto out_put_task; + + ret = 0; + switch (filp->f_pos) { + case 0: + ino = inode->i_ino; + if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) + goto out_unlock; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out_unlock; + filp->f_pos++; + default: + { + unsigned long nr_files, pos, i; + struct flex_array *fa = NULL; + struct map_files_info info; + struct map_files_info *p; + + mm = get_task_mm(task); + if (!mm) + goto out_unlock; + down_read(&mm->mmap_sem); + + nr_files = 0; + + /* + * We need two passes here: + * + * 1) Collect vmas of mapped files with mmap_sem taken + * 2) Release mmap_sem and instantiate entries + * + * otherwise we get lockdep complained, since filldir() + * routine might require mmap_sem taken in might_fault(). + */ + + for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { + if (vma->vm_file && ++pos > filp->f_pos) + nr_files++; + } + + if (nr_files) { + fa = flex_array_alloc(sizeof(info), nr_files, + GFP_KERNEL); + if (!fa || flex_array_prealloc(fa, 0, nr_files, + GFP_KERNEL)) { + ret = -ENOMEM; + if (fa) + flex_array_free(fa); + up_read(&mm->mmap_sem); + mmput(mm); + goto out_unlock; + } + for (i = 0, vma = mm->mmap, pos = 2; vma; + vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (++pos <= filp->f_pos) + continue; + + get_file(vma->vm_file); + info.file = vma->vm_file; + info.len = snprintf(info.name, + sizeof(info.name), "%lx-%lx", + vma->vm_start, vma->vm_end); + if (flex_array_put(fa, i++, &info, GFP_KERNEL)) + BUG(); + } + } + up_read(&mm->mmap_sem); + + for (i = 0; i < nr_files; i++) { + p = flex_array_get(fa, i); + ret = proc_fill_cache(filp, dirent, filldir, + p->name, p->len, + proc_map_files_instantiate, + task, p->file); + if (ret) + break; + filp->f_pos++; + fput(p->file); + } + for (; i < nr_files; i++) { + /* + * In case of error don't forget + * to put rest of file refs. + */ + p = flex_array_get(fa, i); + fput(p->file); + } + if (fa) + flex_array_free(fa); + mmput(mm); + } + } + +out_unlock: + unlock_trace(task); +out_put_task: + put_task_struct(task); +out: + return ret; +} + +static const struct file_operations proc_map_files_operations = { + .read = generic_read_dir, + .readdir = proc_map_files_readdir, + .llseek = default_llseek, +}; + +#endif /* CONFIG_CHECKPOINT_RESTORE */ + /* * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). @@ -2661,6 +3013,9 @@ static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), +#endif DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET diff --git a/include/linux/mm.h b/include/linux/mm.h index 5568553a41f..6eba2cc016c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1482,6 +1482,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +/* Look up the first VMA which exactly match the interval vm_start ... vm_end */ +static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, + unsigned long vm_start, unsigned long vm_end) +{ + struct vm_area_struct *vma = find_vma(mm, vm_start); + + if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end)) + vma = NULL; + + return vma; +} + #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); #else -- cgit v1.2.3 From 97412950b10e64f347aec4a9b759395c2465adf6 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 10 Jan 2012 15:11:27 -0800 Subject: procfs: parse mount options Add support for procfs mount options. Actual mount options are coming in the next patches. Signed-off-by: Vasiliy Kulikov Cc: Alexey Dobriyan Cc: Al Viro Cc: Randy Dunlap Cc: "H. Peter Anvin" Cc: Greg KH Cc: Theodore Tso Cc: Alan Cox Cc: James Morris Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 10 ++++++++++ fs/proc/internal.h | 1 + fs/proc/root.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 51a176622b8..27c762f3487 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,9 @@ #include #include #include +#include #include +#include #include #include @@ -101,12 +104,19 @@ void __init proc_init_inodecache(void) init_once); } +static int proc_show_options(struct seq_file *seq, struct dentry *root) +{ + return 0; +} + static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, + .remount_fs = proc_remount, + .show_options = proc_show_options, }; static void __pde_users_dec(struct proc_dir_entry *pde) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5cfec1..292577531ad 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde); int proc_fill_super(struct super_block *); struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); +int proc_remount(struct super_block *sb, int *flags, char *data); /* * These are generic /proc routines that use the internal diff --git a/fs/proc/root.c b/fs/proc/root.c index 03102d97818..6a8ac1d361a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "internal.h" @@ -36,6 +37,48 @@ static int proc_set_super(struct super_block *sb, void *data) return err; } +enum { + Opt_err, +}; + +static const match_table_t tokens = { + {Opt_err, NULL}, +}; + +static int proc_parse_options(char *options, struct pid_namespace *pid) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + pr_debug("proc: options = %s\n", options); + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + args[0].to = args[0].from = 0; + token = match_token(p, tokens, args); + switch (token) { + default: + pr_err("proc: unrecognized mount option \"%s\" " + "or missing value\n", p); + return 0; + } + } + + return 1; +} + +int proc_remount(struct super_block *sb, int *flags, char *data) +{ + struct pid_namespace *pid = sb->s_fs_info; + return !proc_parse_options(data, pid); +} + static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -43,11 +86,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct super_block *sb; struct pid_namespace *ns; struct proc_inode *ei; + char *options; - if (flags & MS_KERNMOUNT) + if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; - else + options = NULL; + } else { ns = current->nsproxy->pid_ns; + options = data; + } sb = sget(fs_type, proc_test_super, proc_set_super, ns); if (IS_ERR(sb)) @@ -55,6 +102,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (!sb->s_root) { sb->s_flags = flags; + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); -- cgit v1.2.3 From 0499680a42141d86417a8fbaa8c8db806bea1201 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 10 Jan 2012 15:11:31 -0800 Subject: procfs: add hidepid= and gid= mount options Add support for mount options to restrict access to /proc/PID/ directories. The default backward-compatible "relaxed" behaviour is left untouched. The first mount option is called "hidepid" and its value defines how much info about processes we want to be available for non-owners: hidepid=0 (default) means the old behavior - anybody may read all world-readable /proc/PID/* files. hidepid=1 means users may not access any /proc// directories, but their own. Sensitive files like cmdline, sched*, status are now protected against other users. As permission checking done in proc_pid_permission() and files' permissions are left untouched, programs expecting specific files' modes are not confused. hidepid=2 means hidepid=1 plus all /proc/PID/ will be invisible to other users. It doesn't mean that it hides whether a process exists (it can be learned by other means, e.g. by kill -0 $PID), but it hides process' euid and egid. It compicates intruder's task of gathering info about running processes, whether some daemon runs with elevated privileges, whether another user runs some sensitive program, whether other users run any program at all, etc. gid=XXX defines a group that will be able to gather all processes' info (as in hidepid=0 mode). This group should be used instead of putting nonroot user in sudoers file or something. However, untrusted users (like daemons, etc.) which are not supposed to monitor the tasks in the whole system should not be added to the group. hidepid=1 or higher is designed to restrict access to procfs files, which might reveal some sensitive private information like precise keystrokes timings: http://www.openwall.com/lists/oss-security/2011/11/05/3 hidepid=1/2 doesn't break monitoring userspace tools. ps, top, pgrep, and conky gracefully handle EPERM/ENOENT and behave as if the current user is the only user running processes. pstree shows the process subtree which contains "pstree" process. Note: the patch doesn't deal with setuid/setgid issues of keeping preopened descriptors of procfs files (like https://lkml.org/lkml/2011/2/7/368). We rely on that the leaked information like the scheduling counters of setuid apps doesn't threaten anybody's privacy - only the user started the setuid program may read the counters. Signed-off-by: Vasiliy Kulikov Cc: Alexey Dobriyan Cc: Al Viro Cc: Randy Dunlap Cc: "H. Peter Anvin" Cc: Greg KH Cc: Theodore Tso Cc: Alan Cox Cc: James Morris Cc: Oleg Nesterov Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 39 +++++++++++++++++++++ fs/proc/base.c | 69 +++++++++++++++++++++++++++++++++++++- fs/proc/inode.c | 8 +++++ fs/proc/root.c | 21 ++++++++++-- include/linux/pid_namespace.h | 2 ++ 5 files changed, 135 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0ec91f03422..12fee132fbe 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -41,6 +41,8 @@ Table of Contents 3.5 /proc//mountinfo - Information about mounts 3.6 /proc//comm & /proc//task//comm + 4 Configuring procfs + 4.1 Mount options ------------------------------------------------------------------------------ Preface @@ -1542,3 +1544,40 @@ a task to set its own or one of its thread siblings comm value. The comm value is limited in size compared to the cmdline value, so writing anything longer then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated comm value. + + +------------------------------------------------------------------------------ +Configuring procfs +------------------------------------------------------------------------------ + +4.1 Mount options +--------------------- + +The following mount options are supported: + + hidepid= Set /proc// access mode. + gid= Set the group authorized to learn processes information. + +hidepid=0 means classic mode - everybody may access all /proc// directories +(default). + +hidepid=1 means users may not access any /proc// directories but their +own. Sensitive files like cmdline, sched*, status are now protected against +other users. This makes it impossible to learn whether any user runs +specific program (given the program doesn't reveal itself by its behaviour). +As an additional bonus, as /proc//cmdline is unaccessible for other users, +poorly written programs passing sensitive information via program arguments are +now protected against local eavesdroppers. + +hidepid=2 means hidepid=1 plus all /proc// will be fully invisible to other +users. It doesn't mean that it hides a fact whether a process with a specific +pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), +but it hides process' uid and gid, which may be learned by stat()'ing +/proc// otherwise. It greatly complicates an intruder's task of gathering +information about running processes, whether some daemon runs with elevated +privileges, whether other user runs some sensitive program, whether other users +run any program at all, etc. + +gid= defines a group authorized to learn processes information otherwise +prohibited by hidepid=. If you use some daemon like identd which needs to learn +information about processes information, just add identd to this group. diff --git a/fs/proc/base.c b/fs/proc/base.c index 4d755fed3ec..8173dfd89cb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -631,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +/* + * May current process learn task's sched/cmdline info (for hide_pid_min=1) + * or euid/egid (for hide_pid_min=2)? + */ +static bool has_pid_permissions(struct pid_namespace *pid, + struct task_struct *task, + int hide_pid_min) +{ + if (pid->hide_pid < hide_pid_min) + return true; + if (in_group_p(pid->pid_gid)) + return true; + return ptrace_may_access(task, PTRACE_MODE_READ); +} + + +static int proc_pid_permission(struct inode *inode, int mask) +{ + struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct task_struct *task; + bool has_perms; + + task = get_proc_task(inode); + has_perms = has_pid_permissions(pid, task, 1); + put_task_struct(task); + + if (!has_perms) { + if (pid->hide_pid == 2) { + /* + * Let's make getdents(), stat(), and open() + * consistent with each other. If a process + * may not stat() a file, it shouldn't be seen + * in procfs at all. + */ + return -ENOENT; + } + + return -EPERM; + } + return generic_permission(inode, mask); +} + + + static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; @@ -1615,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) struct inode *inode = dentry->d_inode; struct task_struct *task; const struct cred *cred; + struct pid_namespace *pid = dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1623,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = 0; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { + if (!has_pid_permissions(pid, task, 2)) { + rcu_read_unlock(); + /* + * This doesn't prevent learning whether PID exists, + * it only makes getattr() consistent with readdir(). + */ + return -ENOENT; + } if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { cred = __task_cred(task); @@ -3119,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) @@ -3322,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi proc_pid_instantiate, iter.task, NULL); } +static int fake_filldir(void *buf, const char *name, int namelen, + loff_t offset, u64 ino, unsigned d_type) +{ + return 0; +} + /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -3329,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; + filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) goto out_no_task; @@ -3350,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { + if (has_pid_permissions(ns, iter.task, 2)) + __filldir = filldir; + else + __filldir = fake_filldir; + filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { put_task_struct(iter.task); goto out; } @@ -3686,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 27c762f3487..84fd3235a59 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -106,6 +106,14 @@ void __init proc_init_inodecache(void) static int proc_show_options(struct seq_file *seq, struct dentry *root) { + struct super_block *sb = root->d_sb; + struct pid_namespace *pid = sb->s_fs_info; + + if (pid->pid_gid) + seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); + if (pid->hide_pid != 0) + seq_printf(seq, ",hidepid=%u", pid->hide_pid); + return 0; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 6a8ac1d361a..46a15d8a29c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -38,10 +38,12 @@ static int proc_set_super(struct super_block *sb, void *data) } enum { - Opt_err, + Opt_gid, Opt_hidepid, Opt_err, }; static const match_table_t tokens = { + {Opt_hidepid, "hidepid=%u"}, + {Opt_gid, "gid=%u"}, {Opt_err, NULL}, }; @@ -49,8 +51,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; - - pr_debug("proc: options = %s\n", options); + int option; if (!options) return 1; @@ -63,6 +64,20 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + pid->pid_gid = option; + break; + case Opt_hidepid: + if (match_int(&args[0], &option)) + return 0; + if (option < 0 || option > 2) { + pr_err("proc: hidepid value must be between 0 and 2.\n"); + return 0; + } + pid->hide_pid = option; + break; default: pr_err("proc: unrecognized mount option \"%s\" " "or missing value\n", p); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 38d10326246..e7cf6669ac3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,8 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + gid_t pid_gid; + int hide_pid; }; extern struct pid_namespace init_pid_ns; -- cgit v1.2.3 From db804f23a72bada58f083dfad6a65d019ddb3bd4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 10 Jan 2012 16:41:01 +0800 Subject: Btrfs: add pinned extents to on-disk free space cache correctly I got this while running xfstests: [24256.836098] block group 317849600 has an wrong amount of free space [24256.836100] btrfs: failed to load free space cache for block group 317849600 We should clamp the extent returned by find_first_extent_bit(), so the start of the extent won't smaller than the start of the block group. Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ec23d43d0c3..01840ef95a3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -838,7 +838,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct io_ctl io_ctl; struct list_head bitmap_list; struct btrfs_key key; - u64 start, end, len; + u64 start, extent_start, extent_end, len; int entries = 0; int bitmaps = 0; int ret; @@ -857,25 +857,12 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_cluster, block_group_list); - /* - * We shouldn't have switched the pinned extents yet so this is the - * right one - */ - unpin = root->fs_info->pinned_extents; - /* Lock all pages first so we can lock the extent safely. */ io_ctl_prepare_pages(&io_ctl, inode, 0); lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state, GFP_NOFS); - /* - * When searching for pinned extents, we need to start at our start - * offset. - */ - if (block_group) - start = block_group->key.objectid; - node = rb_first(&ctl->free_space_offset); if (!node && cluster) { node = rb_first(&cluster->root); @@ -918,9 +905,20 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * We want to add any pinned extents to our free space cache * so we don't leak the space */ + + /* + * We shouldn't have switched the pinned extents yet so this is the + * right one + */ + unpin = root->fs_info->pinned_extents; + + if (block_group) + start = block_group->key.objectid; + while (block_group && (start < block_group->key.objectid + block_group->key.offset)) { - ret = find_first_extent_bit(unpin, start, &start, &end, + ret = find_first_extent_bit(unpin, start, + &extent_start, &extent_end, EXTENT_DIRTY); if (ret) { ret = 0; @@ -928,20 +926,21 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, } /* This pinned extent is out of our range */ - if (start >= block_group->key.objectid + + if (extent_start >= block_group->key.objectid + block_group->key.offset) break; - len = block_group->key.objectid + - block_group->key.offset - start; - len = min(len, end + 1 - start); + extent_start = max(extent_start, start); + extent_end = min(block_group->key.objectid + + block_group->key.offset, extent_end + 1); + len = extent_end - extent_start; entries++; - ret = io_ctl_add_entry(&io_ctl, start, len, NULL); + ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); if (ret) goto out_nospc; - start = end + 1; + start = extent_end; } /* Write out the bitmaps */ -- cgit v1.2.3 From a1ee5a45818acc7f9c13e560827cf3e8735ac919 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 9 Jan 2012 14:27:42 +0800 Subject: Btrfs: avoid possible NULL deref in io_ctl_drop_pages() If we run into some failure path in io_ctl_prepare_pages(), io_ctl->pages[] array may have some NULL pointers. Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 01840ef95a3..4e55af333e1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -319,9 +319,11 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl) io_ctl_unmap_page(io_ctl); for (i = 0; i < io_ctl->num_pages; i++) { - ClearPageChecked(io_ctl->pages[i]); - unlock_page(io_ctl->pages[i]); - page_cache_release(io_ctl->pages[i]); + if (io_ctl->pages[i]) { + ClearPageChecked(io_ctl->pages[i]); + unlock_page(io_ctl->pages[i]); + page_cache_release(io_ctl->pages[i]); + } } } -- cgit v1.2.3 From 706efc6630c2722602541a6a2fc5900a4e38456a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 9 Jan 2012 14:36:28 +0800 Subject: Btrfs: check the return value of io_ctl_init() It can return -ENOMEM. Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4e55af333e1..e4eb222147c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -637,7 +637,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (!num_entries) return 0; - io_ctl_init(&io_ctl, inode, root); + ret = io_ctl_init(&io_ctl, inode, root); + if (ret) + return ret; + ret = readahead_cache(inode); if (ret) goto out; @@ -851,7 +854,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (!i_size_read(inode)) return -1; - io_ctl_init(&io_ctl, inode, root); + ret = io_ctl_init(&io_ctl, inode, root); + if (ret) + return -1; /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) -- cgit v1.2.3 From f062abf089ff705e09bbaa6fa1e2fd7688a0f2ea Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Dec 2011 13:36:45 +0800 Subject: Btrfs: remove BUG_ON()s in btrfs_ioctl_setflags() We can recover from errors and return -errno to user space. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c04f02c7d5b..9619fb03a5d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -176,6 +176,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_trans_handle *trans; unsigned int flags, oldflags; int ret; + u64 ip_oldflags; + unsigned int i_oldflags; if (btrfs_root_readonly(root)) return -EROFS; @@ -192,6 +194,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) mutex_lock(&inode->i_mutex); + ip_oldflags = ip->flags; + i_oldflags = inode->i_flags; + flags = btrfs_mask_flags(inode->i_mode, flags); oldflags = btrfs_flags_to_ioctl(ip->flags); if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { @@ -250,18 +255,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_drop; + } btrfs_update_iflags(inode); inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); btrfs_end_transaction(trans, root); + out_drop: + if (ret) { + ip->flags = ip_oldflags; + inode->i_flags = i_oldflags; + } mnt_drop_write(file->f_path.mnt); - - ret = 0; out_unlock: mutex_unlock(&inode->i_mutex); return ret; -- cgit v1.2.3 From 4da6f1a332f6c16b6594c7892f13c31459b9b1c8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Dec 2011 13:39:50 +0800 Subject: Btrfs: reserve metadata space in btrfs_ioctl_setflags() Check and reserve space for btrfs_update_inode(). Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9619fb03a5d..fe8a60c865e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -254,7 +254,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } - trans = btrfs_join_transaction(root); + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_drop; -- cgit v1.2.3 From 125ccb0ae6806dbec31abf4a85448971df3b4e39 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 8 Dec 2011 15:07:24 +0800 Subject: Btrfs: don't pass a trans handle unnecessarily in volumes.c Some functions never use the transaction handle passed to them. Signed-off-by: Li Zefan --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/volumes.c | 18 +++++++----------- fs/btrfs/volumes.h | 3 +-- 3 files changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8603ee4e3df..5b53479ce07 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7084,7 +7084,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * space to fit our block group in. */ if (device->total_bytes > device->bytes_used + min_free) { - ret = find_free_dev_extent(NULL, device, min_free, + ret = find_free_dev_extent(device, min_free, &dev_offset, NULL); if (!ret) dev_nr++; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9..73f673c8d8d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -829,7 +829,6 @@ out: /* * find_free_dev_extent - find free space in the specified device - * @trans: transaction handler * @device: the device which we search the free space in * @num_bytes: the size of the free space that we need * @start: store the start of the free space. @@ -848,8 +847,7 @@ out: * But if we don't find suitable free space, it is used to store the size of * the max free space. */ -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { struct btrfs_key key; @@ -893,7 +891,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; - ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; if (ret > 0) { @@ -1469,8 +1467,7 @@ error_undo: /* * does all the dirty work required for changing file system's UUID. */ -static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int btrfs_prepare_sprout(struct btrfs_root *root) { struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_fs_devices *old_devices; @@ -1695,7 +1692,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { sb->s_flags &= ~MS_RDONLY; - ret = btrfs_prepare_sprout(trans, root); + ret = btrfs_prepare_sprout(root); BUG_ON(ret); } @@ -2323,8 +2320,7 @@ done: return ret; } -static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int btrfs_add_system_chunk(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) { @@ -2496,7 +2492,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (total_avail == 0) continue; - ret = find_free_dev_extent(trans, device, + ret = find_free_dev_extent(device, max_stripe_size * dev_stripes, &dev_offset, &max_avail); if (ret && ret != -ENOSPC) @@ -2687,7 +2683,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(ret); if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk, + ret = btrfs_add_system_chunk(chunk_root, &key, chunk, item_size); BUG_ON(ret); } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 78f2d4d4f37..c1701ec9d49 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -230,7 +230,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_root *dev_root); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); #endif -- cgit v1.2.3 From de11cc12df17337979e0929d2831887432f236ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 1 Dec 2011 12:55:47 +0800 Subject: Btrfs: don't pre-allocate btrfs bio We pre-allocate a btrfs bio with fixed size, and then may re-allocate memory if we find stripes are bigger than the fixed size. But this pre-allocation is not necessary. Also we don't have to calcuate the stripe number twice. Signed-off-by: Li Zefan --- fs/btrfs/volumes.c | 67 +++++++++++++++++------------------------------------- 1 file changed, 21 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73f673c8d8d..540fdd25fb5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2897,26 +2897,13 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 stripe_nr; u64 stripe_nr_orig; u64 stripe_nr_end; - int stripes_allocated = 8; - int stripes_required = 1; int stripe_index; int i; + int ret = 0; int num_stripes; int max_errors = 0; struct btrfs_bio *bbio = NULL; - if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) - stripes_allocated = 1; -again: - if (bbio_ret) { - bbio = kzalloc(btrfs_bio_size(stripes_allocated), - GFP_NOFS); - if (!bbio) - return -ENOMEM; - - atomic_set(&bbio->error, 0); - } - read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); read_unlock(&em_tree->lock); @@ -2935,32 +2922,6 @@ again: if (mirror_num > map->num_stripes) mirror_num = 0; - /* if our btrfs_bio struct is too small, back off and try again */ - if (rw & REQ_WRITE) { - if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP)) { - stripes_required = map->num_stripes; - max_errors = 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - stripes_required = map->sub_stripes; - max_errors = 1; - } - } - if (rw & REQ_DISCARD) { - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID10)) { - stripes_required = map->num_stripes; - } - } - if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && - stripes_allocated < stripes_required) { - stripes_allocated = map->num_stripes; - free_extent_map(em); - kfree(bbio); - goto again; - } stripe_nr = offset; /* * stripe_nr counts the total number of stripes we have to stride @@ -3055,6 +3016,13 @@ again: } BUG_ON(stripe_index >= map->num_stripes); + bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); + if (!bbio) { + ret = -ENOMEM; + goto out; + } + atomic_set(&bbio->error, 0); + if (rw & REQ_DISCARD) { for (i = 0; i < num_stripes; i++) { bbio->stripes[i].physical = @@ -3151,15 +3119,22 @@ again: stripe_index++; } } - if (bbio_ret) { - *bbio_ret = bbio; - bbio->num_stripes = num_stripes; - bbio->max_errors = max_errors; - bbio->mirror_num = mirror_num; + + if (rw & REQ_WRITE) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { + max_errors = 1; + } } + + *bbio_ret = bbio; + bbio->num_stripes = num_stripes; + bbio->max_errors = max_errors; + bbio->mirror_num = mirror_num; out: free_extent_map(em); - return 0; + return ret; } int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, -- cgit v1.2.3 From ec9ef7a13be4dcce964c8503e8999087945e5b9e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 1 Dec 2011 14:06:42 +0800 Subject: Btrfs: simplfy calculation of stripe length for discard operation For btrfs raid, while discarding a range of space, we'll need to know the start offset and length to discard for each device, and it's done in btrfs_map_block(). However the calculation is a bit complex for raid0 and raid10, so I reimplement it based on a fact that: dev1 dev2 dev3 (raid0) ----------------------------------- s0 s3 s6 s1 s4 s7 s2 s5 Each device has (total_stripes / nr_dev) stripes, or plus one. Signed-off-by: Li Zefan --- fs/btrfs/volumes.c | 95 ++++++++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 540fdd25fb5..563ef650850 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3024,80 +3024,47 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, atomic_set(&bbio->error, 0); if (rw & REQ_DISCARD) { + int factor = 0; + int sub_stripes = 0; + u64 stripes_per_dev = 0; + u32 remaining_stripes = 0; + + if (map->type & + (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID0) + sub_stripes = 1; + else + sub_stripes = map->sub_stripes; + + factor = map->num_stripes / sub_stripes; + stripes_per_dev = div_u64_rem(stripe_nr_end - + stripe_nr_orig, + factor, + &remaining_stripes); + } + for (i = 0; i < num_stripes; i++) { bbio->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; bbio->stripes[i].dev = map->stripes[stripe_index].dev; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - u64 stripes; - u32 last_stripe = 0; - int j; - - div_u64_rem(stripe_nr_end - 1, - map->num_stripes, - &last_stripe); - - for (j = 0; j < map->num_stripes; j++) { - u32 test; - - div_u64_rem(stripe_nr_end - 1 - j, - map->num_stripes, &test); - if (test == stripe_index) - break; - } - stripes = stripe_nr_end - 1 - j; - do_div(stripes, map->num_stripes); - bbio->stripes[i].length = map->stripe_len * - (stripes - stripe_nr + 1); - - if (i == 0) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + bbio->stripes[i].length = stripes_per_dev * + map->stripe_len; + if (i / sub_stripes < remaining_stripes) + bbio->stripes[i].length += + map->stripe_len; + if (i < sub_stripes) bbio->stripes[i].length -= stripe_offset; - stripe_offset = 0; - } - if (stripe_index == last_stripe) - bbio->stripes[i].length -= - stripe_end_offset; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - u64 stripes; - int j; - int factor = map->num_stripes / - map->sub_stripes; - u32 last_stripe = 0; - - div_u64_rem(stripe_nr_end - 1, - factor, &last_stripe); - last_stripe *= map->sub_stripes; - - for (j = 0; j < factor; j++) { - u32 test; - - div_u64_rem(stripe_nr_end - 1 - j, - factor, &test); - - if (test == - stripe_index / map->sub_stripes) - break; - } - stripes = stripe_nr_end - 1 - j; - do_div(stripes, factor); - bbio->stripes[i].length = map->stripe_len * - (stripes - stripe_nr + 1); - - if (i < map->sub_stripes) { - bbio->stripes[i].length -= - stripe_offset; - if (i == map->sub_stripes - 1) - stripe_offset = 0; - } - if (stripe_index >= last_stripe && - stripe_index <= (last_stripe + - map->sub_stripes - 1)) { + if ((i / sub_stripes + 1) % + sub_stripes == remaining_stripes) bbio->stripes[i].length -= stripe_end_offset; - } + if (i == sub_stripes - 1) + stripe_offset = 0; } else bbio->stripes[i].length = *length; -- cgit v1.2.3 From 7fe1e641502616220437079258506196bc4d8cbf Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Dec 2011 14:47:27 +0800 Subject: Btrfs: rewrite btrfs_trim_block_group() There are various bugs in block group trimming: - It may trim from offset smaller than user-specified offset. - It may trim beyond user-specified range. - It may leak free space for extents smaller than specified minlen. - It may truncate the last trimmed extent thus leak free space. - With mixed extents+bitmaps, some extents may not be trimmed. - With mixed extents+bitmaps, some bitmaps may not be trimmed (even none will be trimmed). Even for those trimmed, not all the free space in the bitmaps will be trimmed. I rewrite btrfs_trim_block_group() and break it into two functions. One is to trim extents only, and the other is to trim bitmaps only. Before patching: # fstrim -v /mnt/ /mnt/: 1496465408 bytes were trimmed After patching: # fstrim -v /mnt/ /mnt/: 2193768448 bytes were trimmed And this matches the total free space: # btrfs fi df /mnt Data: total=3.58GB, used=1.79GB System, DUP: total=8.00MB, used=4.00KB System: total=4.00MB, used=0.00 Metadata, DUP: total=205.12MB, used=97.14MB Metadata: total=8.00MB, used=0.00 Signed-off-by: Li Zefan --- fs/btrfs/free-space-cache.c | 235 +++++++++++++++++++++++++++++++------------- 1 file changed, 164 insertions(+), 71 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4eb222147c..b3cbb8939fa 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2594,17 +2594,57 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) cluster->block_group = NULL; } -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, - u64 *trimmed, u64 start, u64 end, u64 minlen) +static int do_trimming(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 bytes, + u64 reserved_start, u64 reserved_bytes) { - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - struct btrfs_free_space *entry = NULL; + struct btrfs_space_info *space_info = block_group->space_info; struct btrfs_fs_info *fs_info = block_group->fs_info; - u64 bytes = 0; - u64 actually_trimmed; - int ret = 0; + int ret; + int update = 0; + u64 trimmed = 0; - *trimmed = 0; + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (!block_group->ro) { + block_group->reserved += reserved_bytes; + space_info->bytes_reserved += reserved_bytes; + update = 1; + } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + + ret = btrfs_error_discard_extent(fs_info->extent_root, + start, bytes, &trimmed); + if (!ret) + *total_trimmed += trimmed; + + btrfs_add_free_space(block_group, reserved_start, reserved_bytes); + + if (update) { + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (block_group->ro) + space_info->bytes_readonly += reserved_bytes; + block_group->reserved -= reserved_bytes; + space_info->bytes_reserved -= reserved_bytes; + spin_unlock(&space_info->lock); + spin_unlock(&block_group->lock); + } + + return ret; +} + +static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry; + struct rb_node *node; + int ret = 0; + u64 extent_start; + u64 extent_bytes; + u64 bytes; while (start < end) { spin_lock(&ctl->tree_lock); @@ -2615,81 +2655,118 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, } entry = tree_search_offset(ctl, start, 0, 1); - if (!entry) - entry = tree_search_offset(ctl, - offset_to_bitmap(ctl, start), - 1, 1); - - if (!entry || entry->offset >= end) { + if (!entry) { spin_unlock(&ctl->tree_lock); break; } - if (entry->bitmap) { - ret = search_bitmap(ctl, entry, &start, &bytes); - if (!ret) { - if (start >= end) { - spin_unlock(&ctl->tree_lock); - break; - } - bytes = min(bytes, end - start); - bitmap_clear_bits(ctl, entry, start, bytes); - if (entry->bytes == 0) - free_bitmap(ctl, entry); - } else { - start = entry->offset + BITS_PER_BITMAP * - block_group->sectorsize; + /* skip bitmaps */ + while (entry->bitmap) { + node = rb_next(&entry->offset_index); + if (!node) { spin_unlock(&ctl->tree_lock); - ret = 0; - continue; + goto out; } - } else { - start = entry->offset; - bytes = min(entry->bytes, end - start); - unlink_free_space(ctl, entry); - kmem_cache_free(btrfs_free_space_cachep, entry); + entry = rb_entry(node, struct btrfs_free_space, + offset_index); + } + + if (entry->offset >= end) { + spin_unlock(&ctl->tree_lock); + break; + } + + extent_start = entry->offset; + extent_bytes = entry->bytes; + start = max(start, extent_start); + bytes = min(extent_start + extent_bytes, end) - start; + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + goto next; } + unlink_free_space(ctl, entry); + kmem_cache_free(btrfs_free_space_cachep, entry); + spin_unlock(&ctl->tree_lock); - if (bytes >= minlen) { - struct btrfs_space_info *space_info; - int update = 0; - - space_info = block_group->space_info; - spin_lock(&space_info->lock); - spin_lock(&block_group->lock); - if (!block_group->ro) { - block_group->reserved += bytes; - space_info->bytes_reserved += bytes; - update = 1; - } - spin_unlock(&block_group->lock); - spin_unlock(&space_info->lock); - - ret = btrfs_error_discard_extent(fs_info->extent_root, - start, - bytes, - &actually_trimmed); - - btrfs_add_free_space(block_group, start, bytes); - if (update) { - spin_lock(&space_info->lock); - spin_lock(&block_group->lock); - if (block_group->ro) - space_info->bytes_readonly += bytes; - block_group->reserved -= bytes; - space_info->bytes_reserved -= bytes; - spin_unlock(&space_info->lock); - spin_unlock(&block_group->lock); - } + ret = do_trimming(block_group, total_trimmed, start, bytes, + extent_start, extent_bytes); + if (ret) + break; +next: + start += bytes; - if (ret) - break; - *trimmed += actually_trimmed; + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + cond_resched(); + } +out: + return ret; +} + +static int trim_bitmaps(struct btrfs_block_group_cache *block_group, + u64 *total_trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *entry; + int ret = 0; + int ret2; + u64 bytes; + u64 offset = offset_to_bitmap(ctl, start); + + while (offset < end) { + bool next_bitmap = false; + + spin_lock(&ctl->tree_lock); + + if (ctl->free_space < minlen) { + spin_unlock(&ctl->tree_lock); + break; + } + + entry = tree_search_offset(ctl, offset, 1, 0); + if (!entry) { + spin_unlock(&ctl->tree_lock); + next_bitmap = true; + goto next; + } + + bytes = minlen; + ret2 = search_bitmap(ctl, entry, &start, &bytes); + if (ret2 || start >= end) { + spin_unlock(&ctl->tree_lock); + next_bitmap = true; + goto next; + } + + bytes = min(bytes, end - start); + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + goto next; + } + + bitmap_clear_bits(ctl, entry, start, bytes); + if (entry->bytes == 0) + free_bitmap(ctl, entry); + + spin_unlock(&ctl->tree_lock); + + ret = do_trimming(block_group, total_trimmed, start, bytes, + start, bytes); + if (ret) + break; +next: + if (next_bitmap) { + offset += BITS_PER_BITMAP * ctl->unit; + } else { + start += bytes; + if (start >= offset + BITS_PER_BITMAP * ctl->unit) + offset += BITS_PER_BITMAP * ctl->unit; } - start += bytes; - bytes = 0; if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; @@ -2702,6 +2779,22 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, return ret; } +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + int ret; + + *trimmed = 0; + + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); + if (ret) + return ret; + + ret = trim_bitmaps(block_group, trimmed, start, end, minlen); + + return ret; +} + /* * Find the left-most item in the cache tree, and then return the * smallest inode number in the item. -- cgit v1.2.3 From c7c144db531fda414e532adac56e965ce332e2a5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Dec 2011 10:39:22 +0800 Subject: Btrfs: update global block_rsv when creating a new block group A bug was triggered while using seed device: # mkfs.btrfs /dev/loop1 # btrfstune -S 1 /dev/loop1 # mount -o /dev/loop1 /mnt # btrfs dev add /dev/loop2 /mnt btrfs: block rsv returned -28 ------------[ cut here ]------------ WARNING: at fs/btrfs/extent-tree.c:5969 btrfs_alloc_free_block+0x166/0x396 [btrfs]() ... Call Trace: ... [] btrfs_cow_block+0x101/0x147 [btrfs] [] btrfs_search_slot+0x1b8/0x55f [btrfs] [] btrfs_insert_empty_items+0x42/0x7f [btrfs] [] btrfs_insert_item+0x40/0x7e [btrfs] [] btrfs_make_block_group+0x243/0x2aa [btrfs] [] __btrfs_alloc_chunk+0x672/0x70e [btrfs] [] init_first_rw_device+0x77/0x13c [btrfs] [] btrfs_init_new_device+0x664/0x9fd [btrfs] [] btrfs_ioctl+0x694/0xdbe [btrfs] [] do_vfs_ioctl+0x496/0x4cc [] sys_ioctl+0x33/0x4f [] sysenter_do_call+0x12/0x38 ---[ end trace 906adac595facc7d ]--- Since seed device is readonly, there's no usable space in the filesystem. Afterwards we add a sprout device to it, and the kernel creates a METADATA block group and a SYSTEM block group where comes free space we can reserve, but we still get revervation failure because the global block_rsv hasn't been updated accordingly. Signed-off-by: Li Zefan --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5b53479ce07..bf30f670cda 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7446,6 +7446,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); + update_global_block_rsv(root->fs_info); spin_lock(&cache->space_info->lock); cache->space_info->bytes_readonly += cache->bytes_super; -- cgit v1.2.3 From b367e47fb3a70f5d24ebd6faf7d42436d485fb2d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Dec 2011 11:38:24 +0800 Subject: Btrfs: fix possible deadlock when opening a seed device The correct lock order is uuid_mutex -> volume_mutex -> chunk_mutex, but when we mount a filesystem which has backing seed devices, we have this lock chain: open_ctree() lock(chunk_mutex); read_chunk_tree(); read_one_dev(); open_seed_devices(); lock(uuid_mutex); and then we hit a lockdep splat. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/volumes.c | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f9d5551e58..858ab347413 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2270,9 +2270,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); - mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", sb->s_id); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 563ef650850..fbb493b28d5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3506,7 +3506,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) struct btrfs_fs_devices *fs_devices; int ret; - mutex_lock(&uuid_mutex); + BUG_ON(!mutex_is_locked(&uuid_mutex)); fs_devices = root->fs_info->fs_devices->seed; while (fs_devices) { @@ -3544,7 +3544,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) fs_devices->seed = root->fs_info->fs_devices->seed; root->fs_info->fs_devices->seed = fs_devices; out: - mutex_unlock(&uuid_mutex); return ret; } @@ -3687,6 +3686,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) if (!path) return -ENOMEM; + mutex_lock(&uuid_mutex); + lock_chunks(root); + /* first we search for all of the device items, and then we * read in all of the chunk items. This way we can create chunk * mappings that reference all of the devices that are afound @@ -3737,6 +3739,9 @@ again: } ret = 0; error: + unlock_chunks(root); + mutex_unlock(&uuid_mutex); + btrfs_free_path(path); return ret; } -- cgit v1.2.3 From 4041bcdc7bef06a2fb29c57394c713a74bd13b08 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:20:12 -0500 Subject: autofs4: autofs4_wait() vs. autofs4_catatonic_mode() race We need to recheck ->catatonic after autofs4_wait() got ->wq_mutex for good, or we might end up with wq inserted into queue after autofs4_catatonic_mode() had done its thing. It will stick there forever, since there won't be anything to clear its ->name.name. A bit of a complication: validate_request() drops and regains ->wq_mutex. It actually ends up the most convenient place to stick the check into... Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/waitq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index e1fbdeef85d..c13273afd54 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -257,6 +257,9 @@ static int validate_request(struct autofs_wait_queue **wait, struct autofs_wait_queue *wq; struct autofs_info *ino; + if (sbi->catatonic) + return -ENOENT; + /* Wait in progress, continue; */ wq = autofs4_find_wait(sbi, qstr); if (wq) { @@ -289,6 +292,9 @@ static int validate_request(struct autofs_wait_queue **wait, if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; + if (sbi->catatonic) + return -ENOENT; + wq = autofs4_find_wait(sbi, qstr); if (wq) { *wait = wq; @@ -389,7 +395,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, ret = validate_request(&wq, sbi, &qstr, dentry, notify); if (ret <= 0) { - if (ret == 0) + if (ret != -EINTR) mutex_unlock(&sbi->wq_mutex); kfree(qstr.name); return ret; -- cgit v1.2.3 From 8753333266be67ff3a984ac1f6566d31c260bee4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:24:48 -0500 Subject: autofs4: catatonic_mode vs. notify_daemon race we need to hold ->wq_mutex while we are forming the packet to send, lest we have autofs4_catatonic_mode() setting wq->name.name to NULL just as autofs4_notify_daemon() decides to memcpy() from it... We do have check for catatonic mode immediately after that (under ->wq_mutex, as it ought to be) and packet won't be actually sent, but it'll be too late for us if we oops on that memcpy() from NULL... Fix is obvious - just extend the area covered by ->wq_mutex over that switch and check whether it's catatonic *before* doing anything else. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/waitq.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c13273afd54..9a0256da5d5 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -110,6 +110,13 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; + mutex_lock(&sbi->wq_mutex); + + /* Check if we have become catatonic */ + if (sbi->catatonic) { + mutex_unlock(&sbi->wq_mutex); + return; + } switch (type) { /* Kernel protocol v4 missing and expire packets */ case autofs_ptype_missing: @@ -163,22 +170,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, } default: printk("autofs4_notify_daemon: bad type %d!\n", type); + mutex_unlock(&sbi->wq_mutex); return; } - /* Check if we have become catatonic */ - mutex_lock(&sbi->wq_mutex); - if (!sbi->catatonic) { - pipe = sbi->pipe; - get_file(pipe); - } + pipe = sbi->pipe; + get_file(pipe); + mutex_unlock(&sbi->wq_mutex); - if (pipe) { - if (autofs4_write(pipe, &pkt, pktsz)) - autofs4_catatonic_mode(sbi); - fput(pipe); - } + if (autofs4_write(pipe, &pkt, pktsz)) + autofs4_catatonic_mode(sbi); + fput(pipe); } static int autofs4_getpath(struct autofs_sb_info *sbi, -- cgit v1.2.3 From d668dc56631da067540b2494d2a1f29ff7b5f15a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:35:38 -0500 Subject: autofs4: deal with autofs4_write/autofs4_write races Just serialize the actual writing of packets into pipe on a new mutex, independent from everything else in the locking hierarchy. As soon as something has started feeding a piece of packet into the pipe to daemon, we *want* everything else about to try the same to wait until we are done. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 1 + fs/autofs4/inode.c | 1 + fs/autofs4/waitq.c | 9 +++++---- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5869d4e974a..d8d8e7ba6a1 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -116,6 +116,7 @@ struct autofs_sb_info { int needs_reghost; struct super_block *sb; struct mutex wq_mutex; + struct mutex pipe_mutex; spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ spinlock_t lookup_lock; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 2ba44c79d54..e16980b00b8 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -225,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->min_proto = 0; sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); + mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); sbi->queues = NULL; spin_lock_init(&sbi->lookup_lock); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 9a0256da5d5..9ef5b291440 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -56,26 +56,27 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) mutex_unlock(&sbi->wq_mutex); } -static int autofs4_write(struct file *file, const void *addr, int bytes) +static int autofs4_write(struct autofs_sb_info *sbi, + struct file *file, const void *addr, int bytes) { unsigned long sigpipe, flags; mm_segment_t fs; const char *data = (const char *)addr; ssize_t wr = 0; - /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ - sigpipe = sigismember(¤t->pending.signal, SIGPIPE); /* Save pointer to user space and point back to kernel space */ fs = get_fs(); set_fs(KERNEL_DS); + mutex_lock(&sbi->pipe_mutex); while (bytes && (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { data += wr; bytes -= wr; } + mutex_lock(&sbi->pipe_mutex); set_fs(fs); @@ -179,7 +180,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); - if (autofs4_write(pipe, &pkt, pktsz)) + if (autofs4_write(sbi, pipe, &pkt, pktsz)) autofs4_catatonic_mode(sbi); fput(pipe); } -- cgit v1.2.3 From e0c2a9aa1e68455dc3439e95d85cabcaff073666 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Jan 2012 17:18:05 -0500 Subject: GFS2: dlm based recovery coordination This new method of managing recovery is an alternative to the previous approach of using the userland gfs_controld. - use dlm slot numbers to assign journal id's - use dlm recovery callbacks to initiate journal recovery - use a dlm lock to determine the first node to mount fs - use a dlm lock to track journals that need recovery Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 2 +- fs/gfs2/glock.h | 7 +- fs/gfs2/incore.h | 58 ++- fs/gfs2/lock_dlm.c | 993 +++++++++++++++++++++++++++++++++++++++++++- fs/gfs2/main.c | 10 + fs/gfs2/ops_fstype.c | 29 +- fs/gfs2/recovery.c | 4 + fs/gfs2/sys.c | 33 +- fs/gfs2/sys.h | 2 + include/linux/gfs2_ondisk.h | 2 + 10 files changed, 1098 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 88e8a23d002..376816fcd04 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) spin_lock(&gl->gl_spin); gl->gl_reply = ret; - if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_spin); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2553b858a72..307ac31df78 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -121,8 +121,11 @@ enum { struct lm_lockops { const char *lm_proto_name; - int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); - void (*lm_unmount) (struct gfs2_sbd *sdp); + int (*lm_mount) (struct gfs2_sbd *sdp, const char *table); + void (*lm_first_done) (struct gfs2_sbd *sdp); + void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result); + void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e1d3bb59945..b9422bc8e2f 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -139,8 +139,45 @@ struct gfs2_bufdata { #define GDLM_STRNAME_BYTES 25 #define GDLM_LVB_SIZE 32 +/* + * ls_recover_flags: + * + * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been + * held by failed nodes whose journals need recovery. Those locks should + * only be used for journal recovery until the journal recovery is done. + * This is set by the dlm recover_prep callback and cleared by the + * gfs2_control thread when journal recovery is complete. To avoid + * races between recover_prep setting and gfs2_control clearing, recover_spin + * is held while changing this bit and reading/writing recover_block + * and recover_start. + * + * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. + * + * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing + * recovery of all journals before allowing other nodes to mount the fs. + * This is cleared when FIRST_MOUNT_DONE is set. + * + * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished + * recovery of all journals, and now allows other nodes to mount the fs. + * + * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared + * BLOCK_LOCKS for the first time. The gfs2_control thread should now + * control clearing BLOCK_LOCKS for further recoveries. + * + * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. + * + * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() + * and recover_done(), i.e. set while recover_block == recover_start. + */ + enum { DFL_BLOCK_LOCKS = 0, + DFL_NO_DLM_OPS = 1, + DFL_FIRST_MOUNT = 2, + DFL_FIRST_MOUNT_DONE = 3, + DFL_MOUNT_DONE = 4, + DFL_UNMOUNT = 5, + DFL_DLM_RECOVERY = 6, }; struct lm_lockname { @@ -499,14 +536,26 @@ struct gfs2_sb_host { struct lm_lockstruct { int ls_jid; unsigned int ls_first; - unsigned int ls_first_done; unsigned int ls_nodir; const struct lm_lockops *ls_ops; - unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid_done; - int ls_recover_jid_status; + int ls_recover_jid_done; /* These two are deprecated, */ + int ls_recover_jid_status; /* used previously by gfs_controld */ + + struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ + struct dlm_lksb ls_control_lksb; /* control_lock */ + char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ + struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + + spinlock_t ls_recover_spin; /* protects following fields */ + unsigned long ls_recover_flags; /* DFL_ */ + uint32_t ls_recover_mount; /* gen in first recover_done cb */ + uint32_t ls_recover_start; /* gen in last recover_done cb */ + uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ + uint32_t ls_recover_size; /* size of recover_submit, recover_result */ + uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ + uint32_t *ls_recover_result; /* result of last jid recovery */ }; struct gfs2_sbd { @@ -544,6 +593,7 @@ struct gfs2_sbd { wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; + struct delayed_work sd_control_work; /* Inode Stuff */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index ce85b62bc0a..8944d1e32ab 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright 2004-2011 Red Hat, Inc. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -11,12 +11,15 @@ #include #include #include +#include #include #include "incore.h" #include "glock.h" #include "util.h" +#include "sys.h" +extern struct workqueue_struct *gfs2_control_wq; static void gdlm_ast(void *arg) { @@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl) dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } -static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) +/* + * dlm/gfs2 recovery coordination using dlm_recover callbacks + * + * 1. dlm_controld sees lockspace members change + * 2. dlm_controld blocks dlm-kernel locking activity + * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) + * 4. dlm_controld starts and finishes its own user level recovery + * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery + * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot) + * 7. dlm_recoverd does its own lock recovery + * 8. dlm_recoverd unblocks dlm-kernel locking activity + * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation) + * 10. gfs2_control updates control_lock lvb with new generation and jid bits + * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none) + * 12. gfs2_recover dequeues and recovers journals of failed nodes + * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result) + * 14. gfs2_control updates control_lock lvb jid bits for recovered journals + * 15. gfs2_control unblocks normal locking when all journals are recovered + * + * - failures during recovery + * + * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control + * clears BLOCK_LOCKS (step 15), e.g. another node fails while still + * recovering for a prior failure. gfs2_control needs a way to detect + * this so it can leave BLOCK_LOCKS set in step 15. This is managed using + * the recover_block and recover_start values. + * + * recover_done() provides a new lockspace generation number each time it + * is called (step 9). This generation number is saved as recover_start. + * When recover_prep() is called, it sets BLOCK_LOCKS and sets + * recover_block = recover_start. So, while recover_block is equal to + * recover_start, BLOCK_LOCKS should remain set. (recover_spin must + * be held around the BLOCK_LOCKS/recover_block/recover_start logic.) + * + * - more specific gfs2 steps in sequence above + * + * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start + * 6. recover_slot records any failed jids (maybe none) + * 9. recover_done sets recover_start = new generation number + * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids + * 12. gfs2_recover does journal recoveries for failed jids identified above + * 14. gfs2_control clears control_lock lvb bits for recovered jids + * 15. gfs2_control checks if recover_block == recover_start (step 3 occured + * again) then do nothing, otherwise if recover_start > recover_block + * then clear BLOCK_LOCKS. + * + * - parallel recovery steps across all nodes + * + * All nodes attempt to update the control_lock lvb with the new generation + * number and jid bits, but only the first to get the control_lock EX will + * do so; others will see that it's already done (lvb already contains new + * generation number.) + * + * . All nodes get the same recover_prep/recover_slot/recover_done callbacks + * . All nodes attempt to set control_lock lvb gen + bits for the new gen + * . One node gets control_lock first and writes the lvb, others see it's done + * . All nodes attempt to recover jids for which they see control_lock bits set + * . One node succeeds for a jid, and that one clears the jid bit in the lvb + * . All nodes will eventually see all lvb bits clear and unblock locks + * + * - is there a problem with clearing an lvb bit that should be set + * and missing a journal recovery? + * + * 1. jid fails + * 2. lvb bit set for step 1 + * 3. jid recovered for step 1 + * 4. jid taken again (new mount) + * 5. jid fails (for step 4) + * 6. lvb bit set for step 5 (will already be set) + * 7. lvb bit cleared for step 3 + * + * This is not a problem because the failure in step 5 does not + * require recovery, because the mount in step 4 could not have + * progressed far enough to unblock locks and access the fs. The + * control_mount() function waits for all recoveries to be complete + * for the latest lockspace generation before ever unblocking locks + * and returning. The mount in step 4 waits until the recovery in + * step 1 is done. + * + * - special case of first mounter: first node to mount the fs + * + * The first node to mount a gfs2 fs needs to check all the journals + * and recover any that need recovery before other nodes are allowed + * to mount the fs. (Others may begin mounting, but they must wait + * for the first mounter to be done before taking locks on the fs + * or accessing the fs.) This has two parts: + * + * 1. The mounted_lock tells a node it's the first to mount the fs. + * Each node holds the mounted_lock in PR while it's mounted. + * Each node tries to acquire the mounted_lock in EX when it mounts. + * If a node is granted the mounted_lock EX it means there are no + * other mounted nodes (no PR locks exist), and it is the first mounter. + * The mounted_lock is demoted to PR when first recovery is done, so + * others will fail to get an EX lock, but will get a PR lock. + * + * 2. The control_lock blocks others in control_mount() while the first + * mounter is doing first mount recovery of all journals. + * A mounting node needs to acquire control_lock in EX mode before + * it can proceed. The first mounter holds control_lock in EX while doing + * the first mount recovery, blocking mounts from other nodes, then demotes + * control_lock to NL when it's done (others_may_mount/first_done), + * allowing other nodes to continue mounting. + * + * first mounter: + * control_lock EX/NOQUEUE success + * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters) + * set first=1 + * do first mounter recovery + * mounted_lock EX->PR + * control_lock EX->NL, write lvb generation + * + * other mounter: + * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry) + * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR) + * mounted_lock PR/NOQUEUE success + * read lvb generation + * control_lock EX->NL + * set first=0 + * + * - mount during recovery + * + * If a node mounts while others are doing recovery (not first mounter), + * the mounting node will get its initial recover_done() callback without + * having seen any previous failures/callbacks. + * + * It must wait for all recoveries preceding its mount to be finished + * before it unblocks locks. It does this by repeating the "other mounter" + * steps above until the lvb generation number is >= its mount generation + * number (from initial recover_done) and all lvb bits are clear. + * + * - control_lock lvb format + * + * 4 bytes generation number: the latest dlm lockspace generation number + * from recover_done callback. Indicates the jid bitmap has been updated + * to reflect all slot failures through that generation. + * 4 bytes unused. + * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates + * that jid N needs recovery. + */ + +#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */ + +static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); + memcpy(&gen, lvb_bits, sizeof(uint32_t)); + *lvb_gen = le32_to_cpu(gen); +} + +static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, + char *lvb_bits) +{ + uint32_t gen; + memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); + gen = cpu_to_le32(lvb_gen); + memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); +} + +static int all_jid_bits_clear(char *lvb) +{ + int i; + for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { + if (lvb[i]) + return 0; + } + return 1; +} + +static void sync_wait_cb(void *arg) +{ + struct lm_lockstruct *ls = arg; + complete(&ls->ls_sync_wait); +} + +static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (fsname == NULL) { - fs_info(sdp, "no fsname found\n"); - return -EINVAL; + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + if (error) { + fs_err(sdp, "%s lkid %x error %d\n", + name, lksb->sb_lkid, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + if (lksb->sb_status != -DLM_EUNLOCK) { + fs_err(sdp, "%s lkid %x status %d\n", + name, lksb->sb_lkid, lksb->sb_status); + return -1; + } + return 0; +} + +static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, + unsigned int num, struct dlm_lksb *lksb, char *name) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char strname[GDLM_STRNAME_BYTES]; + int error, status; + + memset(strname, 0, GDLM_STRNAME_BYTES); + snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); + + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + if (error) { + fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", + name, lksb->sb_lkid, flags, mode, error); + return error; + } + + wait_for_completion(&ls->ls_sync_wait); + + status = lksb->sb_status; + + if (status && status != -EAGAIN) { + fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n", + name, lksb->sb_lkid, flags, mode, status); + } + + return status; +} + +static int mounted_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK, + &ls->ls_mounted_lksb, "mounted_lock"); +} + +static int control_unlock(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock"); +} + +static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK, + &ls->ls_control_lksb, "control_lock"); +} + +static void gfs2_control_func(struct work_struct *work) +{ + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t block_gen, start_gen, lvb_gen, flags; + int recover_set = 0; + int write_lvb = 0; + int recover_size; + int i, error; + + spin_lock(&ls->ls_recover_spin); + /* + * No MOUNT_DONE means we're still mounting; control_mount() + * will set this flag, after which this thread will take over + * all further clearing of BLOCK_LOCKS. + * + * FIRST_MOUNT means this node is doing first mounter recovery, + * for which recovery control is handled by + * control_mount()/control_first_done(), not this thread. + */ + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + spin_unlock(&ls->ls_recover_spin); + + /* + * Equal block_gen and start_gen implies we are between + * recover_prep and recover_done callbacks, which means + * dlm recovery is in progress and dlm locking is blocked. + * There's no point trying to do any work until recover_done. + */ + + if (block_gen == start_gen) + return; + + /* + * Propagate recover_submit[] and recover_result[] to lvb: + * dlm_recoverd adds to recover_submit[] jids needing recovery + * gfs2_recover adds to recover_result[] journal recovery results + * + * set lvb bit for jids in recover_submit[] if the lvb has not + * yet been updated for the generation of the failure + * + * clear lvb bit for jids in recover_result[] if the result of + * the journal recovery is SUCCESS + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control lock EX error %d\n", error); + return; + } + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + spin_lock(&ls->ls_recover_spin); + if (block_gen != ls->ls_recover_block || + start_gen != ls->ls_recover_start) { + fs_info(sdp, "recover generation %u block1 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + return; + } + + recover_size = ls->ls_recover_size; + + if (lvb_gen <= start_gen) { + /* + * Clear lvb bits for jids we've successfully recovered. + * Because all nodes attempt to recover failed journals, + * a journal can be recovered multiple times successfully + * in succession. Only the first will really do recovery, + * the others find it clean, but still report a successful + * recovery. So, another node may have already recovered + * the jid and cleared the lvb bit for it. + */ + for (i = 0; i < recover_size; i++) { + if (ls->ls_recover_result[i] != LM_RD_SUCCESS) + continue; + + ls->ls_recover_result[i] = 0; + + if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) + continue; + + __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + write_lvb = 1; + } + } + + if (lvb_gen == start_gen) { + /* + * Failed slots before start_gen are already set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < lvb_gen) + ls->ls_recover_submit[i] = 0; + } + } else if (lvb_gen < start_gen) { + /* + * Failed slots before start_gen are not yet set in lvb. + */ + for (i = 0; i < recover_size; i++) { + if (!ls->ls_recover_submit[i]) + continue; + if (ls->ls_recover_submit[i] < start_gen) { + ls->ls_recover_submit[i] = 0; + __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + } + } + /* even if there are no bits to set, we need to write the + latest generation to the lvb */ + write_lvb = 1; + } else { + /* + * we should be getting a recover_done() for lvb_gen soon + */ + } + spin_unlock(&ls->ls_recover_spin); + + if (write_lvb) { + control_lvb_write(ls, start_gen, lvb_bits); + flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; + } else { + flags = DLM_LKF_CONVERT; + } + + error = control_lock(sdp, DLM_LOCK_NL, flags); + if (error) { + fs_err(sdp, "control lock NL error %d\n", error); + return; + } + + /* + * Everyone will see jid bits set in the lvb, run gfs2_recover_set(), + * and clear a jid bit in the lvb if the recovery is a success. + * Eventually all journals will be recovered, all jid bits will + * be cleared in the lvb, and everyone will clear BLOCK_LOCKS. + */ + + for (i = 0; i < recover_size; i++) { + if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { + fs_info(sdp, "recover generation %u jid %d\n", + start_gen, i); + gfs2_recover_set(sdp, i); + recover_set++; + } + } + if (recover_set) + return; + + /* + * No more jid bits set in lvb, all recovery is done, unblock locks + * (unless a new recover_prep callback has occured blocking locks + * again while working above) + */ + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_block == block_gen && + ls->ls_recover_start == start_gen) { + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "recover generation %u done\n", start_gen); + gfs2_glock_thaw(sdp); + } else { + fs_info(sdp, "recover generation %u block2 %u %u\n", + start_gen, block_gen, ls->ls_recover_block); + spin_unlock(&ls->ls_recover_spin); + } +} + +static int control_mount(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen, mount_gen, lvb_gen; + int mounted_mode; + int retries = 0; + int error; + + memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb)); + memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE); + ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb; + init_completion(&ls->ls_sync_wait); + + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK); + if (error) { + fs_err(sdp, "control_mount control_lock NL error %d\n", error); + return error; + } + + error = mounted_lock(sdp, DLM_LOCK_NL, 0); + if (error) { + fs_err(sdp, "control_mount mounted_lock NL error %d\n", error); + control_unlock(sdp); + return error; + } + mounted_mode = DLM_LOCK_NL; + +restart: + if (retries++ && signal_pending(current)) { + error = -EINTR; + goto fail; + } + + /* + * We always start with both locks in NL. control_lock is + * demoted to NL below so we don't need to do it here. + */ + + if (mounted_mode != DLM_LOCK_NL) { + error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + mounted_mode = DLM_LOCK_NL; + } + + /* + * Other nodes need to do some work in dlm recovery and gfs2_control + * before the recover_done and control_lock will be ready for us below. + * A delay here is not required but often avoids having to retry. + */ + + msleep_interruptible(500); + + /* + * Acquire control_lock in EX and mounted_lock in either EX or PR. + * control_lock lvb keeps track of any pending journal recoveries. + * mounted_lock indicates if any other nodes have the fs mounted. + */ + + error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK); + if (error == -EAGAIN) { + goto restart; + } else if (error) { + fs_err(sdp, "control_mount control_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_EX; + goto locks_done; + } else if (error != -EAGAIN) { + fs_err(sdp, "control_mount mounted_lock EX error %d\n", error); + goto fail; + } + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); + if (!error) { + mounted_mode = DLM_LOCK_PR; + goto locks_done; + } else { + /* not even -EAGAIN should happen here */ + fs_err(sdp, "control_mount mounted_lock PR error %d\n", error); + goto fail; + } + +locks_done: + /* + * If we got both locks above in EX, then we're the first mounter. + * If not, then we need to wait for the control_lock lvb to be + * updated by other mounted nodes to reflect our mount generation. + * + * In simple first mounter cases, first mounter will see zero lvb_gen, + * but in cases where all existing nodes leave/fail before mounting + * nodes finish control_mount, then all nodes will be mounting and + * lvb_gen will be non-zero. + */ + + control_lvb_read(ls, &lvb_gen, lvb_bits); + + if (lvb_gen == 0xFFFFFFFF) { + /* special value to force mount attempts to fail */ + fs_err(sdp, "control_mount control_lock disabled\n"); + error = -EINVAL; + goto fail; + } + + if (mounted_mode == DLM_LOCK_EX) { + /* first mounter, keep both EX while doing first recovery */ + spin_lock(&ls->ls_recover_spin); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "first mounter control generation %u\n", lvb_gen); + return 0; + } + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); + if (error) + goto fail; + + /* + * We are not first mounter, now we need to wait for the control_lock + * lvb generation to be >= the generation from our first recover_done + * and all lvb bits to be clear (no pending journal recoveries.) + */ + + if (!all_jid_bits_clear(lvb_bits)) { + /* journals need recovery, wait until all are clear */ + fs_info(sdp, "control_mount wait for journal recovery\n"); + goto restart; + } + + spin_lock(&ls->ls_recover_spin); + block_gen = ls->ls_recover_block; + start_gen = ls->ls_recover_start; + mount_gen = ls->ls_recover_mount; + + if (lvb_gen < mount_gen) { + /* wait for mounted nodes to update control_lock lvb to our + generation, which might include new recovery bits set */ + fs_info(sdp, "control_mount wait1 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (lvb_gen != start_gen) { + /* wait for mounted nodes to update control_lock lvb to the + latest recovery generation */ + fs_info(sdp, "control_mount wait2 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; + } + + if (block_gen == start_gen) { + /* dlm recovery in progress, wait for it to finish */ + fs_info(sdp, "control_mount wait3 block %u start %u mount %u " + "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, + lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + goto restart; } - error = dlm_new_lockspace(fsname, NULL, - DLM_LSFL_FS | DLM_LSFL_NEWEXCL | - (ls->ls_nodir ? DLM_LSFL_NODIR : 0), - GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + return 0; + +fail: + mounted_unlock(sdp); + control_unlock(sdp); + return error; +} + +static int dlm_recovery_wait(void *word) +{ + schedule(); + return 0; +} + +static int control_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char lvb_bits[GDLM_LVB_SIZE]; + uint32_t start_gen, block_gen; + int error; + +restart: + spin_lock(&ls->ls_recover_spin); + start_gen = ls->ls_recover_start; + block_gen = ls->ls_recover_block; + + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) || + !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + /* sanity check, should not happen */ + fs_err(sdp, "control_first_done start %u block %u flags %lx\n", + start_gen, block_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + control_unlock(sdp); + return -1; + } + + if (start_gen == block_gen) { + /* + * Wait for the end of a dlm recovery cycle to switch from + * first mounter recovery. We can ignore any recover_slot + * callbacks between the recover_prep and next recover_done + * because we are still the first mounter and any failed nodes + * have not fully mounted, so they don't need recovery. + */ + spin_unlock(&ls->ls_recover_spin); + fs_info(sdp, "control_first_done wait gen %u\n", start_gen); + + wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, + dlm_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + + clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags); + memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); + memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); + spin_unlock(&ls->ls_recover_spin); + + memset(lvb_bits, 0, sizeof(lvb_bits)); + control_lvb_write(ls, start_gen, lvb_bits); + + error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); + if (error) + fs_err(sdp, "control_first_done mounted PR error %d\n", error); + + error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK); if (error) - printk(KERN_ERR "dlm_new_lockspace error %d", error); + fs_err(sdp, "control_first_done control NL error %d\n", error); return error; } +/* + * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) + * to accomodate the largest slot number. (NB dlm slot numbers start at 1, + * gfs2 jids start at 0, so jid = slot - 1) + */ + +#define RECOVER_SIZE_INC 16 + +static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, + int num_slots) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + uint32_t *submit = NULL; + uint32_t *result = NULL; + uint32_t old_size, new_size; + int i, max_jid; + + max_jid = 0; + for (i = 0; i < num_slots; i++) { + if (max_jid < slots[i].slot - 1) + max_jid = slots[i].slot - 1; + } + + old_size = ls->ls_recover_size; + + if (old_size >= max_jid + 1) + return 0; + + new_size = old_size + RECOVER_SIZE_INC; + + submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + if (!submit || !result) { + kfree(submit); + kfree(result); + return -ENOMEM; + } + + spin_lock(&ls->ls_recover_spin); + memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t)); + memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t)); + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = submit; + ls->ls_recover_result = result; + ls->ls_recover_size = new_size; + spin_unlock(&ls->ls_recover_spin); + return 0; +} + +static void free_recover_size(struct lm_lockstruct *ls) +{ + kfree(ls->ls_recover_submit); + kfree(ls->ls_recover_result); + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + ls->ls_recover_size = 0; +} + +/* dlm calls before it does lock recovery */ + +static void gdlm_recover_prep(void *arg) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_block = ls->ls_recover_start; + set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + + if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || + test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_prep has been completed on all lockspace members; + identifies slot/jid of failed member */ + +static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int jid = slot->slot - 1; + + spin_lock(&ls->ls_recover_spin); + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recover_slot jid %d gen %u short size %d", + jid, ls->ls_recover_block, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + if (ls->ls_recover_submit[jid]) { + fs_info(sdp, "recover_slot jid %d gen %u prev %u", + jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); + } + ls->ls_recover_submit[jid] = ls->ls_recover_block; + spin_unlock(&ls->ls_recover_spin); +} + +/* dlm calls after recover_slot and after it completes lock recovery */ + +static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, + int our_slot, uint32_t generation) +{ + struct gfs2_sbd *sdp = arg; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + /* ensure the ls jid arrays are large enough */ + set_recover_size(sdp, slots, num_slots); + + spin_lock(&ls->ls_recover_spin); + ls->ls_recover_start = generation; + + if (!ls->ls_recover_mount) { + ls->ls_recover_mount = generation; + ls->ls_jid = our_slot - 1; + } + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); + + clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); + spin_unlock(&ls->ls_recover_spin); +} + +/* gfs2_recover thread has a journal recovery result */ + +static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, + unsigned int result) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + /* don't care about the recovery of own journal during mount */ + if (jid == ls->ls_jid) + return; + + spin_lock(&ls->ls_recover_spin); + if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { + spin_unlock(&ls->ls_recover_spin); + return; + } + if (ls->ls_recover_size < jid + 1) { + fs_err(sdp, "recovery_result jid %d short size %d", + jid, ls->ls_recover_size); + spin_unlock(&ls->ls_recover_spin); + return; + } + + fs_info(sdp, "recover jid %d result %s\n", jid, + result == LM_RD_GAVEUP ? "busy" : "success"); + + ls->ls_recover_result[jid] = result; + + /* GAVEUP means another node is recovering the journal; delay our + next attempt to recover it, to give the other node a chance to + finish before trying again */ + + if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, + result == LM_RD_GAVEUP ? HZ : 0); + spin_unlock(&ls->ls_recover_spin); +} + +const struct dlm_lockspace_ops gdlm_lockspace_ops = { + .recover_prep = gdlm_recover_prep, + .recover_slot = gdlm_recover_slot, + .recover_done = gdlm_recover_done, +}; + +static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + char cluster[GFS2_LOCKNAME_LEN]; + const char *fsname; + uint32_t flags; + int error, ops_result; + + /* + * initialize everything + */ + + INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + spin_lock_init(&ls->ls_recover_spin); + ls->ls_recover_flags = 0; + ls->ls_recover_mount = 0; + ls->ls_recover_start = 0; + ls->ls_recover_block = 0; + ls->ls_recover_size = 0; + ls->ls_recover_submit = NULL; + ls->ls_recover_result = NULL; + + error = set_recover_size(sdp, NULL, 0); + if (error) + goto fail; + + /* + * prepare dlm_new_lockspace args + */ + + fsname = strchr(table, ':'); + if (!fsname) { + fs_info(sdp, "no fsname found\n"); + error = -EINVAL; + goto fail_free; + } + memset(cluster, 0, sizeof(cluster)); + memcpy(cluster, table, strlen(table) - strlen(fsname)); + fsname++; + + flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + if (ls->ls_nodir) + flags |= DLM_LSFL_NODIR; + + /* + * create/join lockspace + */ + + error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, + &gdlm_lockspace_ops, sdp, &ops_result, + &ls->ls_dlm); + if (error) { + fs_err(sdp, "dlm_new_lockspace error %d\n", error); + goto fail_free; + } + + if (ops_result < 0) { + /* + * dlm does not support ops callbacks, + * old dlm_controld/gfs_controld are used, try without ops. + */ + fs_info(sdp, "dlm lockspace ops not used\n"); + free_recover_size(ls); + set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags); + return 0; + } + + if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) { + fs_err(sdp, "dlm lockspace ops disallow jid preset\n"); + error = -EINVAL; + goto fail_release; + } + + /* + * control_mount() uses control_lock to determine first mounter, + * and for later mounts, waits for any recoveries to be cleared. + */ + + error = control_mount(sdp); + if (error) { + fs_err(sdp, "mount control error %d\n", error); + goto fail_release; + } + + ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); + clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + return 0; + +fail_release: + dlm_release_lockspace(ls->ls_dlm, 2); +fail_free: + free_recover_size(ls); +fail: + return error; +} + +static void gdlm_first_done(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int error; + + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + return; + + error = control_first_done(sdp); + if (error) + fs_err(sdp, "mount first_done error %d\n", error); +} + static void gdlm_unmount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) + goto release; + + /* wait for gfs2_control_wq to be done with this mount */ + + spin_lock(&ls->ls_recover_spin); + set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + flush_delayed_work_sync(&sdp->sd_control_work); + + /* mounted_lock and control_lock will be purged in dlm recovery */ +release: if (ls->ls_dlm) { dlm_release_lockspace(ls->ls_dlm, 2); ls->ls_dlm = NULL; } + + free_recover_size(ls); } static const match_table_t dlm_tokens = { @@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = { const struct lm_lockops gfs2_dlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, + .lm_first_done = gdlm_first_done, + .lm_recovery_result = gdlm_recovery_result, .lm_unmount = gdlm_unmount, .lm_put_lock = gdlm_put_lock, .lm_lock = gdlm_lock, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c150298e2d8..a8d9bcd0e19 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -28,6 +28,8 @@ #include "recovery.h" #include "dir.h" +struct workqueue_struct *gfs2_control_wq; + static struct shrinker qd_shrinker = { .shrink = gfs2_shrink_qd_memory, .seeks = DEFAULT_SEEKS, @@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void) if (!gfs_recovery_wq) goto fail_wq; + gfs2_control_wq = alloc_workqueue("gfs2_control", + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); + if (!gfs2_control_wq) + goto fail_control; + gfs2_register_debugfs(); printk("GFS2 installed\n"); return 0; +fail_control: + destroy_workqueue(gfs_recovery_wq); fail_wq: unregister_filesystem(&gfs2meta_fs_type); fail_unregister: @@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); + destroy_workqueue(gfs2_control_wq); rcu_barrier(); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index fe72e79e6ff..b01573b7ad9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp) { char *message = "FIRSTMOUNT=Done"; char *envp[] = { message, NULL }; - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_first_done = 1; + + fs_info(sdp, "first mount done, others may mount\n"); + + if (sdp->sd_lockstruct.ls_ops->lm_first_done) + sdp->sd_lockstruct.ls_ops->lm_first_done(sdp); + kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } @@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) struct gfs2_args *args = &sdp->sd_args; const char *proto = sdp->sd_proto_name; const char *table = sdp->sd_table_name; - const char *fsname; char *o, *options; int ret; @@ -1004,21 +1007,12 @@ hostdata_error: } } - if (sdp->sd_args.ar_spectator) - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); - else - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, - sdp->sd_lockstruct.ls_jid); - - fsname = strchr(table, ':'); - if (fsname) - fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); return 0; } - ret = lm->lm_mount(sdp, fsname); + ret = lm->lm_mount(sdp, table); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); complete_all(&sdp->sd_locking_init); @@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail; + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); + gfs2_create_debugfs_file(sdp); error = gfs2_sys_fs_add(sdp); @@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_sb; } + if (sdp->sd_args.ar_spectator) + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", + sdp->sd_table_name); + else + snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", + sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); + error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f2a02edcac8..af49e8f432f 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, char env_status[20]; char *envp[] = { env_jid, env_status, NULL }; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + ls->ls_recover_jid_done = jid; ls->ls_recover_jid_status = message; sprintf(env_jid, "JID=%d", jid); sprintf(env_status, "RECOVERY=%s", message == LM_RD_SUCCESS ? "Done" : "Failed"); kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); + + if (sdp->sd_lockstruct.ls_ops->lm_recovery_result) + sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message); } void gfs2_recover_func(struct work_struct *work) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 443cabcfcd2..d33172c291b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) ssize_t ret; int val = 0; - if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags)) + if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) val = 1; ret = sprintf(buf, "%d\n", val); return ret; @@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) val = simple_strtol(buf, NULL, 0); if (val == 1) - set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { - clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); + clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); smp_mb__after_clear_bit(); gfs2_glock_thaw(sdp); } else { @@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) goto out; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) goto out; - sdp->sd_lockstruct.ls_first = first; - rv = 0; + sdp->sd_lockstruct.ls_first = first; + rv = 0; out: spin_unlock(&sdp->sd_jindex_spin); return rv ? rv : len; @@ -360,19 +360,14 @@ out: static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_first_done); + return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); } -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) { - unsigned jid; struct gfs2_jdesc *jd; int rv; - rv = sscanf(buf, "%u", &jid); - if (rv != 1) - return -EINVAL; - rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) @@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) } out: spin_unlock(&sdp->sd_jindex_spin); + return rv; +} + +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned jid; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + rv = gfs2_recover_set(sdp, jid); + return rv ? rv : len; } diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h index e94560e836d..79182d6ad6a 100644 --- a/fs/gfs2/sys.h +++ b/fs/gfs2/sys.h @@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp); int gfs2_sys_init(void); void gfs2_sys_uninit(void); +int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid); + #endif /* __SYS_DOT_H__ */ diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 4f4462974c1..b148087f49a 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -22,6 +22,8 @@ #define GFS2_LIVE_LOCK 1 #define GFS2_TRANS_LOCK 2 #define GFS2_RENAME_LOCK 3 +#define GFS2_CONTROL_LOCK 4 +#define GFS2_MOUNTED_LOCK 5 /* Format numbers for various metadata types */ -- cgit v1.2.3 From 49528b4e479195e5db4fe51fcd5ddd97901efc16 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 6 Jan 2012 16:48:50 -0500 Subject: GFS2: Fix a use-after-free that coverity spotted In function gfs2_inplace_release it was trying to unlock a gfs2_holder structure associated with a reservation, after said reservation was freed. The problem is that the statements have the wrong order. This patch corrects the order so that the reservation is freed after the gfs2_holder is unlocked. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 22234627f68..981bfa32121 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1108,9 +1108,9 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - gfs2_blkrsv_put(ip); if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + gfs2_blkrsv_put(ip); } /** -- cgit v1.2.3 From e8ca5cc571a60339491f8c273a01093096ff8704 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Jan 2012 14:40:06 -0500 Subject: GFS2: let spectator mount do read only recovery Previously, a spectator mount would not even attempt to do journal recovery for a failed node. This meant that if all mounted nodes were spectators, everyone would be stuck after a node failed, all waiting for recovery to be performed. This is unnecessary since the failed node had a clean journal. Instead, allow a spectator mount to do a partial "read only" recovery, which means it will check if the failed journal is clean, and if so, report a successful recovery. If the failed journal is not clean, it reports that journal recovery failed. This makes it work the same as a read only mount on a read only block device. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/recovery.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b9422bc8e2f..e5701c70f6f 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -498,6 +498,7 @@ enum { SDF_NORECOVERY = 4, SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, + SDF_RORECOVERY = 7, /* read only recovery */ }; #define GFS2_FSNAME_LEN 256 diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b01573b7ad9..6aacf3f230a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1078,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; - set_bit(SDF_NORECOVERY, &sdp->sd_flags); + set_bit(SDF_RORECOVERY, &sdp->sd_flags); } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index af49e8f432f..80701d1566a 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -516,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work) if (error) goto fail_gunlock_ji; - if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { + if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { + ro = 1; + } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { -- cgit v1.2.3 From 376d37788b56bc2800e5bd56b7a36b3544d89f97 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Jan 2012 15:29:20 -0500 Subject: GFS2: fail mount if journal recovery fails If the first mounter fails to recover one of the journals during mount, the mount should fail. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/recovery.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e5701c70f6f..97742a7ea9c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -429,6 +429,7 @@ struct gfs2_jdesc { #define JDF_RECOVERY 1 unsigned int jd_jid; unsigned int jd_blocks; + int jd_recover_error; }; struct gfs2_statfs_change_host { diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 80701d1566a..963b2d75200 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -583,6 +583,7 @@ fail_gunlock_j: fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); fail: + jd->jd_recover_error = error; gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); @@ -611,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); - return 0; + return wait ? jd->jd_recover_error : 0; } -- cgit v1.2.3 From 66ad863b410efb7f537719006f9ac52400c1a5c5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 11 Jan 2012 12:35:05 +0000 Subject: GFS2: Fix nlink setting on inode creation Since the nlink count will be 0, we need to use set_nlink rather than inc_nlink in order to avoid triggering the inc_nlink warning which was added recently. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 017960cf1d7..a7d611b93f0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -599,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - inc_nlink(&ip->i_inode); - if (S_ISDIR(ip->i_inode.i_mode)) - inc_nlink(&ip->i_inode); + set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); -- cgit v1.2.3 From 353b67d8ced4dc53281c88150ad295e24bc4b4c5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 26 Nov 2011 00:35:39 +0100 Subject: jbd: Issue cache flush after checkpointing When we reach cleanup_journal_tail(), there is no guarantee that checkpointed buffers are on a stable storage - especially if buffers were written out by log_do_checkpoint(), they are likely to be only in disk's caches. Thus when we update journal superblock, effectively removing old transaction from journal, this write of superblock can get to stable storage before those checkpointed buffers which can result in filesystem corruption after a crash. A similar problem can happen if we replay the journal and wipe it before flushing disk's caches. Thus we must unconditionally issue a cache flush before we update journal superblock in these cases. The fix is slightly complicated by the fact that we have to get log tail before we issue cache flush but we can store it in the journal superblock only after the cache flush. Otherwise we risk races where new tail is written before appropriate cache flush is finished. I managed to reproduce the corruption using somewhat tweaked Chris Mason's barrier-test scheduler. Also this should fix occasional reports of 'Bit already freed' filesystem errors which are totally unreproducible but inspection of several fs images I've gathered over time points to a problem like this. CC: stable@kernel.org Signed-off-by: Jan Kara --- fs/jbd/checkpoint.c | 27 ++++++++++++++++++++++----- fs/jbd/recovery.c | 4 ++++ 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 5d1a00a5041..05f0754f2b4 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -453,8 +453,6 @@ out: * * Return <0 on error, 0 on success, 1 if there was nothing to clean up. * - * Called with the journal lock held. - * * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed @@ -472,13 +470,14 @@ int cleanup_journal_tail(journal_t *journal) if (is_journal_aborted(journal)) return 1; - /* OK, work out the oldest transaction remaining in the log, and + /* + * OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * * If the log is now empty, we need to work out which is the * next transaction ID we will write, and where it will - * start. */ - + * start. + */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); transaction = journal->j_checkpoint_transactions; @@ -504,7 +503,25 @@ int cleanup_journal_tail(journal_t *journal) spin_unlock(&journal->j_state_lock); return 1; } + spin_unlock(&journal->j_state_lock); + + /* + * We need to make sure that any blocks that were recently written out + * --- perhaps by log_do_checkpoint() --- are flushed out before we + * drop the transactions from the journal. It's unlikely this will be + * necessary, especially with an appropriately sized journal, but we + * need this to guarantee correctness. Fortunately + * cleanup_journal_tail() doesn't get called all that often. + */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + spin_lock(&journal->j_state_lock); + if (!tid_gt(first_tid, journal->j_tail_sequence)) { + spin_unlock(&journal->j_state_lock); + /* Someone else cleaned up journal so return 0 */ + return 0; + } /* OK, update the superblock to recover the freed space. * Physical blocks come first: have we wrapped beyond the end of * the log? */ diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 5b43e96788e..008bf062fd2 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -20,6 +20,7 @@ #include #include #include +#include #endif /* @@ -263,6 +264,9 @@ int journal_recover(journal_t *journal) err2 = sync_blockdev(journal->j_fs_dev); if (!err) err = err2; + /* Flush disk caches to get replayed data on the permanent storage */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); return err; } -- cgit v1.2.3 From 34b07840565004cfa444e165e88bf77a5cbcdb25 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Mon, 9 Jan 2012 15:58:37 +0100 Subject: ext2: protect inode changes in the SETVERSION and SETFLAGS ioctls Unlock mutex after i_flags and i_ctime updates in the EXT2_IOC_SETFLAGS ioctl. Use i_mutex in the EXT2_IOC_SETVERSION ioctl to protect i_ctime and i_generation updates and make the ioctl consistent since i_mutex is also used in other places to protect timestamps and inode changes. Cc: Andreas Dilger Cc: Jan Kara Signed-off-by: Djalal Harouni Signed-off-by: Jan Kara --- fs/ext2/ioctl.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 1089f760c84..2de655f5d62 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -77,10 +77,11 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) flags = flags & EXT2_FL_USER_MODIFIABLE; flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; ei->i_flags = flags; - mutex_unlock(&inode->i_mutex); ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; + mutex_unlock(&inode->i_mutex); + mark_inode_dirty(inode); setflags_out: mnt_drop_write_file(filp); @@ -88,20 +89,29 @@ setflags_out: } case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); - case EXT2_IOC_SETVERSION: + case EXT2_IOC_SETVERSION: { + __u32 generation; + if (!inode_owner_or_capable(inode)) return -EPERM; ret = mnt_want_write_file(filp); if (ret) return ret; - if (get_user(inode->i_generation, (int __user *) arg)) { + if (get_user(generation, (int __user *) arg)) { ret = -EFAULT; - } else { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); + goto setversion_out; } + + mutex_lock(&inode->i_mutex); + inode->i_ctime = CURRENT_TIME_SEC; + inode->i_generation = generation; + mutex_unlock(&inode->i_mutex); + + mark_inode_dirty(inode); +setversion_out: mnt_drop_write_file(filp); return ret; + } case EXT2_IOC_GETRSVSZ: if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode) -- cgit v1.2.3 From 1f5d78dc4823a85f112aaa2d0f17624f8c2a6c52 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 11 Jan 2012 15:13:27 +0200 Subject: UBIFS: make debugging messages light again We switch to dynamic debugging in commit 56e46742e846e4de167dde0e1e1071ace1c882a5 but did not take into account that now we do not control anymore whether a specific message is enabled or not. So now we lock the "dbg_lock" and release it in every debugging macro, which make them not so light-weight. This commit removes the "dbg_lock" protection from the debugging macros to fix the issue. The downside is that now our DBGKEY() stuff is broken, but this is not critical at all and will be fixed later. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org [3.0+] --- fs/ubifs/debug.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8d9c4681018..164fd48c583 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -175,19 +175,17 @@ const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key); /* - * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message - * macros. + * TODO: these macros are now broken because there is no locking around them + * and we use a global buffer for the key string. This means that in case of + * concurrent execution we will end up with incorrect and messy key strings. */ #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) extern spinlock_t dbg_lock; -#define ubifs_dbg_msg(type, fmt, ...) do { \ - spin_lock(&dbg_lock); \ - pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ -} while (0) +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) /* Just a debugging messages not related to any specific UBIFS subsystem */ #define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) -- cgit v1.2.3 From d34315da9146253351146140ea4b277193ee5e5f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 10 Jan 2012 19:32:30 +0200 Subject: UBIFS: fix debugging messages Patch 56e46742e846e4de167dde0e1e1071ace1c882a5 broke UBIFS debugging messages: before that commit when UBIFS debugging was enabled, users saw few useful debugging messages after mount. However, that patch turned 'dbg_msg()' into 'pr_debug()', so to enable the debugging messages users have to enable them first via /sys/kernel/debug/dynamic_debug/control, which is very impractical. This commit makes 'dbg_msg()' to use 'printk()' instead of 'pr_debug()', just as it was before the breakage. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org [3.0+] --- fs/ubifs/debug.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 164fd48c583..c9d294111a5 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -188,7 +188,10 @@ extern spinlock_t dbg_lock; pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) /* Just a debugging messages not related to any specific UBIFS subsystem */ -#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__) + /* General messages */ #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ -- cgit v1.2.3 From d46cfba5363a163851dc768f717f34185527a472 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 4 Jan 2012 16:30:15 -0600 Subject: ceph: always initialize the dentry in open_root_dentry() When open_root_dentry() gets a dentry via d_obtain_alias() it does not get initialized. If the dentry obtained came from the cache, this is OK. But if not, the result is an improperly initialized dentry. To fix this, call ceph_init_dentry() regardless of which path produced the dentry. That function returns immediately for a dentry that is already initialized, it is safe to use either way. (Credit to Sage, who suggested this fix.) Signed-off-by: Alex Elder --- fs/ceph/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b48f15f101a..ec74313e901 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -638,12 +638,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, if (err == 0) { dout("open_root_inode success\n"); if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && - fsc->sb->s_root == NULL) { + fsc->sb->s_root == NULL) root = d_alloc_root(req->r_target_inode); - ceph_init_dentry(root); - } else { + else root = d_obtain_alias(req->r_target_inode); - } + ceph_init_dentry(root); req->r_target_inode = NULL; dout("open_root_inode success, root dentry is %p\n", root); } else { -- cgit v1.2.3 From 0b4156eb27214e81f7012458bb15d1e038db9a00 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 12 Jan 2012 09:11:56 +0100 Subject: fs: remove unneeded plug in mpage_readpages() The block plug in mpage_readpages() duplicates the one in read_pages(). Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/mpage.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/mpage.c b/fs/mpage.c index fdfae9fa98c..643e9f55ef2 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -371,9 +371,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, sector_t last_block_in_bio = 0; struct buffer_head map_bh; unsigned long first_logical_block = 0; - struct blk_plug plug; - - blk_start_plug(&plug); map_bh.b_state = 0; map_bh.b_size = 0; @@ -395,7 +392,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, BUG_ON(!list_empty(pages)); if (bio) mpage_bio_submit(READ, bio); - blk_finish_plug(&plug); return 0; } EXPORT_SYMBOL(mpage_readpages); -- cgit v1.2.3 From 46fe44ce8777f087aa8ad4a2605fdcfb9c2d63af Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Nov 2011 15:03:59 +0100 Subject: quota: Pass information that quota is stored in system file to userspace Quota tools need to know whether quota is stored in a system file or in classical aquota.{user|group} files. So pass this information as a flag in GETINFO quotactl. Signed-off-by: Jan Kara --- fs/quota/dquot.c | 8 +++++--- include/linux/quota.h | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 5ec59b20cf7..46741970371 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, mutex_unlock(&dqopt->dqio_mutex); goto out_file_init; } + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) + dqopt->info[type].dqi_flags |= DQF_SYS_FILE; mutex_unlock(&dqopt->dqio_mutex); spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); @@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) spin_lock(&dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; - ii->dqi_flags = mi->dqi_flags & DQF_MASK; + ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK; ii->dqi_valid = IIF_ALL; spin_unlock(&dq_data_lock); mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); @@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) if (ii->dqi_valid & IIF_IGRACE) mi->dqi_igrace = ii->dqi_igrace; if (ii->dqi_valid & IIF_FLAGS) - mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | - (ii->dqi_flags & DQF_MASK); + mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) | + (ii->dqi_flags & DQF_SETINFO_MASK); spin_unlock(&dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ diff --git a/include/linux/quota.h b/include/linux/quota.h index cb785569903..c09fa042b5e 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -230,7 +230,11 @@ struct mem_dqinfo { struct super_block; #define DQF_MASK 0xffff /* Mask for format specific flags */ -#define DQF_INFO_DIRTY_B 16 +#define DQF_GETINFO_MASK 0x1ffff /* Mask for flags passed to userspace */ +#define DQF_SETINFO_MASK 0xffff /* Mask for flags modifiable from userspace */ +#define DQF_SYS_FILE_B 16 +#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B) /* Quota file stored as system file */ +#define DQF_INFO_DIRTY_B 31 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ extern void mark_info_dirty(struct super_block *sb, int type); -- cgit v1.2.3 From 46f72b349290d2bd7aecea38f02609d814332df6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jan 2012 09:04:37 -0800 Subject: vfs: export symbol d_find_any_alias() Ceph needs this. Reviewed-by: Christoph Hellwig Signed-off-by: Sage Weil --- fs/dcache.c | 11 +++++++++-- include/linux/dcache.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 89509b5a090..ba960051dfb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1471,7 +1471,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -static struct dentry * d_find_any_alias(struct inode *inode) +/** + * d_find_any_alias - find any alias for a given inode + * @inode: inode to find an alias for + * + * If any aliases exist for the given inode, take and return a + * reference for one of them. If no aliases exist, return %NULL. + */ +struct dentry *d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1480,7 +1487,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } - +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ed9f74f6c51..3871ba743b4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -241,6 +241,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); -- cgit v1.2.3 From a40dc6cc2e121abcbd1b22583ef5447763df510c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jan 2012 09:12:55 -0800 Subject: ceph: enable/disable dentry complete flags via mount option Enable/disable use of the dentry dir 'complete' flag via a mount option. This lets the admin control whether ceph uses the dcache to satisfy negative lookups or readdir when it has the entire directory contents in its cache. This is purely a performance optimization; correctness is guaranteed whether it is enabled or not. Reviewed-by: Christoph Hellwig Signed-off-by: Sage Weil --- Documentation/filesystems/ceph.txt | 18 +++++++++++++----- fs/ceph/dir.c | 25 ++++++++++++++++++++++--- fs/ceph/super.c | 14 ++++++++++++++ fs/ceph/super.h | 1 + 4 files changed, 50 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 763d8ebbbeb..d6030aa3337 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -119,12 +119,20 @@ Mount Options must rely on TCP's error correction to detect data corruption in the data payload. - noasyncreaddir - Disable client's use its local cache to satisfy readdir - requests. (This does not change correctness; the client uses - cached metadata only when a lease or capability ensures it is - valid.) + dcache + Use the dcache contents to perform negative lookups and + readdir when the client has the entire directory contents in + its cache. (This does not change correctness; the client uses + cached metadata only when a lease or capability ensures it is + valid.) + + nodcache + Do not use the dcache as above. This avoids a significant amount of + complex code, sacrificing performance without affecting correctness, + and is useful for tracking down bugs. + noasyncreaddir + Do not use the dcache as above for readdir. More Information ================ diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 974ef1e4d26..5259abfb5dd 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1094,17 +1094,36 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry, */ void ceph_dir_set_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry) && + ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { + dout(" marking %p (%p) complete\n", inode, dentry); + set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); } void ceph_dir_clear_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry)) { + dout(" marking %p (%p) complete\n", inode, dentry); + set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); } bool ceph_dir_test_complete(struct inode *inode) { - /* not yet implemented */ + struct dentry *dentry = d_find_any_alias(inode); + + if (dentry && ceph_dentry(dentry)) { + dout(" marking %p (%p) NOT complete\n", inode, dentry); + clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); + } + dput(dentry); return false; } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index ec74313e901..9c62fe02ce0 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -131,6 +131,8 @@ enum { Opt_rbytes, Opt_norbytes, Opt_noasyncreaddir, + Opt_dcache, + Opt_nodcache, Opt_ino32, }; @@ -152,6 +154,8 @@ static match_table_t fsopt_tokens = { {Opt_rbytes, "rbytes"}, {Opt_norbytes, "norbytes"}, {Opt_noasyncreaddir, "noasyncreaddir"}, + {Opt_dcache, "dcache"}, + {Opt_nodcache, "nodcache"}, {Opt_ino32, "ino32"}, {-1, NULL} }; @@ -231,6 +235,12 @@ static int parse_fsopt_token(char *c, void *private) case Opt_noasyncreaddir: fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; break; + case Opt_dcache: + fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; + break; + case Opt_nodcache: + fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; + break; case Opt_ino32: fsopt->flags |= CEPH_MOUNT_OPT_INO32; break; @@ -377,6 +387,10 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",norbytes"); if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) seq_puts(m, ",noasyncreaddir"); + if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) + seq_puts(m, ",dcache"); + else + seq_puts(m, ",nodcache"); if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index edcbf3774a5..140f99f978c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -28,6 +28,7 @@ #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ +#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) -- cgit v1.2.3 From 83eb26af0db71f2dfe551405c55d982288fa6178 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 11 Jan 2012 17:41:01 -0800 Subject: ceph: ensure prealloc_blob is in place when removing xattr In __ceph_build_xattrs_blob(), if a ceph inode's extended attributes are marked dirty, all attributes recorded in its rb_tree index are formatted into a "blob" buffer. The target buffer is recorded in ceph_inode->i_xattrs.prealloc_blob, and it is expected to exist and be of sufficient size to hold the attributes. The extended attributes are marked dirty in two cases: when a new attribute is added to the inode; or when one is removed. In the former case work is done to ensure the prealloc_blob buffer is properly set up, but in the latter it is not. Change the logic in ceph_removexattr() so it matches what is done in ceph_setxattr(). Note that this is done in a way that keeps the two blocks of code nearly identical, in anticipation of a subsequent patch that encapsulates some of this logic into one or more helper routines. Signed-off-by: Alex Elder Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a5e36e4488a..857214ae8c0 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -818,6 +818,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); int issued; int err; + int required_blob_size; int dirty; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -833,14 +834,34 @@ int ceph_removexattr(struct dentry *dentry, const char *name) return -EOPNOTSUPP; } + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); __build_xattrs(inode); +retry: issued = __ceph_caps_issued(ci, NULL); dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); if (!(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + required_blob_size = __get_required_blob_size(ci, 0, 0); + + if (!ci->i_xattrs.prealloc_blob || + required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { + struct ceph_buffer *blob; + + spin_unlock(&ci->i_ceph_lock); + dout(" preaallocating new blob size=%d\n", required_blob_size); + blob = ceph_buffer_new(required_blob_size, GFP_NOFS); + if (!blob) + goto out; + spin_lock(&ci->i_ceph_lock); + if (ci->i_xattrs.prealloc_blob) + ceph_buffer_put(ci->i_xattrs.prealloc_blob); + ci->i_xattrs.prealloc_blob = blob; + goto retry; + } + err = __remove_xattr_by_name(ceph_inode(inode), name); dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); ci->i_xattrs.dirty = true; @@ -853,6 +874,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) do_sync: spin_unlock(&ci->i_ceph_lock); err = ceph_send_removexattr(dentry, name); +out: return err; } -- cgit v1.2.3 From 7250170c9ed00f3b74b11b98afefab45020672dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jan 2012 10:46:27 +0300 Subject: cifs: integer overflow in parse_dacl() On 32 bit systems num_aces * sizeof(struct cifs_ace *) could overflow leading to a smaller ppace buffer than we expected. Signed-off-by: Dan Carpenter Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 72ddf23ef6f..c1b25448738 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -909,6 +909,8 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, umode_t group_mask = S_IRWXG; umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; + if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *)) + return; ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); if (!ppace) { -- cgit v1.2.3 From 0e0243dc35a2349b3946e54f90e874be396fdb8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jan 2012 10:06:05 +0300 Subject: NFS: add an endian notation This function returns a big endian value. The implementation in fs/nfs/callback_proc.c is declared with "__be32" but the .h file uses "unsigned" instead. It makes sparse complain: fs/nfs/callback_proc.c:232:8: error: symbol 'nfs4_callback_layoutrecall' redeclared with different type (originally declared at fs/nfs/callback.h:165) - different base types Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 07df5f1d85e..c89d3b9e483 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -162,7 +162,7 @@ struct cb_layoutrecallargs { }; }; -extern unsigned nfs4_callback_layoutrecall( +extern __be32 nfs4_callback_layoutrecall( struct cb_layoutrecallargs *args, void *dummy, struct cb_process_state *cps); -- cgit v1.2.3 From 13fff2f35fd21d69ee84ef6a78610420e1a42818 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jan 2012 10:07:35 +0300 Subject: NFS: cleanup endian type in decode_ds_addr() port is supposed to be a __be16 here. The existing code should work fine, but this is a cleanup. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index ed388aae968..8ae91908f5a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -382,7 +382,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; - u32 port; + __be16 port; int nlen, rlen; int tmp[2]; __be32 *p; -- cgit v1.2.3 From 363e0df057ea8da539645fe4c3c227e3d44054cc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jan 2012 10:16:14 +0300 Subject: nfs: check for integer overflow in decode_devicenotify_args() On 32 bit, if n is too large then "n * sizeof(*args->devs)" could overflow and args->devs would be smaller than expected. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 726e59a9e50..d50b2742f23 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -305,6 +305,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, n = ntohl(*p++); if (n <= 0) goto out; + if (n > ULONG_MAX / sizeof(*args->devs)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { -- cgit v1.2.3 From de040beccd52bb5fcac90031505384d037b1111c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 10 Jan 2012 22:42:47 +0800 Subject: NFS4: fix compile warnings in nfs4proc.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compile in nfs-for-3.3 branch shows following warnings. Fix it here. fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’: fs/nfs/nfs4proc.c:3589: warning: format ‘%ld’ expects type ‘long int’, but argument 4 has type ‘size_t’ fs/nfs/nfs4proc.c:3589: warning: format ‘%ld’ expects type ‘long int’, but argument 6 has type ‘size_t’ Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75366dc8968..f0c849c98fe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3587,7 +3587,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu res.acl_flags |= NFS4_ACL_LEN_REQUEST; resp_buf = page_address(pages[0]); - dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", + dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); -- cgit v1.2.3 From 39e567ae36fe03c2b446e1b83ee3d39bea08f90b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:41 +0800 Subject: pnfsblock: acquire im_lock in _preload_range When calling _add_entry, we should take the im_lock to protect agains other modifiers. Cc: #3.1+ Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extents.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 19fa7b0b8c0..c69682a4262 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -139,11 +139,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) } /* Ensure that future operations on given range of tree will not malloc */ -static int _preload_range(struct my_tree *tree, u64 offset, u64 length) +static int _preload_range(struct pnfs_inval_markings *marks, + u64 offset, u64 length) { u64 start, end, s; int count, i, used = 0, status = -ENOMEM; struct pnfs_inval_tracking **storage; + struct my_tree *tree = &marks->im_tree; dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); start = normalize(offset, tree->mtt_step_size); @@ -161,12 +163,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) goto out_cleanup; } - /* Now need lock - HOW??? */ - + spin_lock(&marks->im_lock); for (s = start; s < end; s += tree->mtt_step_size) used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); + spin_unlock(&marks->im_lock); - /* Unlock - HOW??? */ status = 0; out_cleanup: @@ -286,7 +287,7 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks, start = normalize(offset, marks->im_block_size); end = normalize_up(offset + length, marks->im_block_size); - if (_preload_range(&marks->im_tree, start, end - start)) + if (_preload_range(marks, start, end - start)) goto outerr; spin_lock(&marks->im_lock); -- cgit v1.2.3 From 82b906d6550ee5fe0d5553359b3c9692dd0aed31 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:43 +0800 Subject: pnfsblock: set read/write tk_status to pnfs_error To pass the IO status to upper layer. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 281ae95932c..06fe0802118 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -216,6 +216,7 @@ bl_end_par_io_read(void *data) { struct nfs_read_data *rdata = data; + rdata->task.tk_status = rdata->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); schedule_work(&rdata->task.u.tk_work); } @@ -405,7 +406,7 @@ static void bl_end_par_io_write(void *data) { struct nfs_write_data *wdata = data; - wdata->task.tk_status = 0; + wdata->task.tk_status = wdata->pnfs_error; wdata->verf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); schedule_work(&wdata->task.u.tk_work); -- cgit v1.2.3 From 57582b372f63d0f655b1a35b0d306d73d1a46775 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:45 +0800 Subject: pnfsblock: clean up _add_entry It is wrong to kmalloc in _add_entry() as it is inside spinlock. memory should be already allocated _add_entry() is called. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extents.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index c69682a4262..369ef53e89e 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, return 0; } else { struct pnfs_inval_tracking *new; - if (storage) - new = storage; - else { - new = kmalloc(sizeof(*new), GFP_NOFS); - if (!new) - return -ENOMEM; - } + new = storage; new->it_sector = s; new->it_tags = (1 << tag); list_add(&new->it_link, &pos->it_link); -- cgit v1.2.3 From 93a3844ee0f843b05a1df4b52e1a19ff26b98d24 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:47 +0800 Subject: pnfsblock: don't spinlock when freeing block_dev bl_free_block_dev() may sleep. We can not call it with spinlock held. Besides, there is no need to take bm_lock as we are last user freeing bm_devlist. Cc: #3.1+ Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 06fe0802118..6d39e9ab1e6 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -780,16 +780,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) static void free_blk_mountid(struct block_mount_id *mid) { if (mid) { - struct pnfs_block_dev *dev; - spin_lock(&mid->bm_lock); - while (!list_empty(&mid->bm_devlist)) { - dev = list_first_entry(&mid->bm_devlist, - struct pnfs_block_dev, - bm_node); + struct pnfs_block_dev *dev, *tmp; + + /* No need to take bm_lock as we are last user freeing bm_devlist */ + list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { list_del(&dev->bm_node); bl_free_block_dev(dev); } - spin_unlock(&mid->bm_lock); kfree(mid); } } -- cgit v1.2.3 From 74a6eeb44ca6174d9cc93b9b8b4d58211c57bc80 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:48 +0800 Subject: pnfsblock: limit bio page count One bio can have at most BIO_MAX_PAGES pages. We should limit it bec otherwise bio_alloc will fail when there are many pages in one read/write_pagelist. Cc: #3.1+ Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6d39e9ab1e6..baf0bf2acbd 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -146,14 +146,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, { struct bio *bio; + npg = min(npg, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, npg); - if (!bio) - return NULL; + if (!bio && (current->flags & PF_MEMALLOC)) { + while (!bio && (npg /= 2)) + bio = bio_alloc(GFP_NOIO, npg); + } - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; - bio->bi_bdev = be->be_mdev; - bio->bi_end_io = end_io; - bio->bi_private = par; + if (bio) { + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = end_io; + bio->bi_private = par; + } return bio; } -- cgit v1.2.3 From 60c52e3a72fda10e82f38b6f979956eb2dcb3d4e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:40 +0800 Subject: pnfsblock: cleanup bl_mark_sectors_init It does not need to manipulate on partial initialized blocks. Writeback code takes care of it. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 6 ++-- fs/nfs/blocklayout/blocklayout.h | 3 +- fs/nfs/blocklayout/extents.c | 76 +++------------------------------------- 3 files changed, 8 insertions(+), 77 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index baf0bf2acbd..a263810803c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -577,8 +577,7 @@ fill_invalid_ext: unlock_page(page); ret = bl_mark_sectors_init(be->be_inval, isect, - PAGE_CACHE_SECTORS, - NULL); + PAGE_CACHE_SECTORS); if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); @@ -627,8 +626,7 @@ next_page: } if (be->be_state == PNFS_BLOCK_INVALID_DATA) { ret = bl_mark_sectors_init(be->be_inval, isect, - PAGE_CACHE_SECTORS, - NULL); + PAGE_CACHE_SECTORS); if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 42acf7ef599..60728acc7b9 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -186,8 +186,7 @@ struct pnfs_block_extent * bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, struct pnfs_block_extent **cow_read); int bl_mark_sectors_init(struct pnfs_inval_markings *marks, - sector_t offset, sector_t length, - sector_t **pages); + sector_t offset, sector_t length); void bl_put_extent(struct pnfs_block_extent *be); struct pnfs_block_extent *bl_alloc_extent(void); int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 369ef53e89e..d0f52ed2242 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -174,33 +174,6 @@ static int _preload_range(struct pnfs_inval_markings *marks, return status; } -static void set_needs_init(sector_t *array, sector_t offset) -{ - sector_t *p = array; - - dprintk("%s enter\n", __func__); - if (!p) - return; - while (*p < offset) - p++; - if (*p == offset) - return; - else if (*p == ~0) { - *p++ = offset; - *p = ~0; - return; - } else { - sector_t *save = p; - dprintk("%s Adding %llu\n", __func__, (u64)offset); - while (*p != ~0) - p++; - p++; - memmove(save + 1, save, (char *)p - (char *)save); - *save = offset; - return; - } -} - /* We are relying on page lock to serialize this */ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) { @@ -256,28 +229,15 @@ static int is_range_written(struct pnfs_inval_markings *marks, /* Marks sectors in [offest, offset_length) as having been initialized. * All lengths are step-aligned, where step is min(pagesize, blocksize). - * Notes where partial block is initialized, and helps prepare it for - * complete initialization later. + * Currently assumes offset is page-aligned */ -/* Currently assumes offset is page-aligned */ int bl_mark_sectors_init(struct pnfs_inval_markings *marks, - sector_t offset, sector_t length, - sector_t **pages) + sector_t offset, sector_t length) { - sector_t s, start, end; - sector_t *array = NULL; /* Pages to mark */ + sector_t start, end; dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, (u64)offset, (u64)length); - s = max((sector_t) 3, - 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); - dprintk("%s set max=%llu\n", __func__, (u64)s); - if (pages) { - array = kmalloc(s * sizeof(sector_t), GFP_NOFS); - if (!array) - goto outerr; - array[0] = ~0; - } start = normalize(offset, marks->im_block_size); end = normalize_up(offset + length, marks->im_block_size); @@ -285,41 +245,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks, goto outerr; spin_lock(&marks->im_lock); - - for (s = normalize_up(start, PAGE_CACHE_SECTORS); - s < offset; s += PAGE_CACHE_SECTORS) { - dprintk("%s pre-area pages\n", __func__); - /* Portion of used block is not initialized */ - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) - set_needs_init(array, s); - } if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) goto out_unlock; - for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); - s < end; s += PAGE_CACHE_SECTORS) { - dprintk("%s post-area pages\n", __func__); - if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) - set_needs_init(array, s); - } - spin_unlock(&marks->im_lock); - if (pages) { - if (array[0] == ~0) { - kfree(array); - *pages = NULL; - } else - *pages = array; - } return 0; - out_unlock: +out_unlock: spin_unlock(&marks->im_lock); - outerr: - if (pages) { - kfree(array); - *pages = NULL; - } +outerr: return -ENOMEM; } -- cgit v1.2.3 From 72c508879979522de347bcec706507e00d7c443d Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:42 +0800 Subject: pnfsblock: move find lock page logic out of bl_write_pagelist Also avoid unnecessary lock_page if page is handled by others. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 78 +++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index a263810803c..23427362185 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -490,6 +490,55 @@ cleanup: return ret; } +/* Find or create a zeroing page marked being writeback. + * Return ERR_PTR on error, NULL to indicate skip this page and page itself + * to indicate write out. + */ +static struct page * +bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, + struct pnfs_block_extent *cow_read) +{ + struct page *page; + int locked = 0; + page = find_get_page(inode->i_mapping, index); + if (page) + goto check_page; + + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + if (unlikely(!page)) { + dprintk("%s oom\n", __func__); + return ERR_PTR(-ENOMEM); + } + locked = 1; + +check_page: + /* PageDirty: Other will write this out + * PageWriteback: Other is writing this out + * PageUptodate: It was read before + */ + if (PageDirty(page) || PageWriteback(page)) { + print_page(page); + if (locked) + unlock_page(page); + page_cache_release(page); + return NULL; + } + + if (!locked) { + lock_page(page); + locked = 1; + goto check_page; + } + if (!PageUptodate(page)) { + /* New page, readin or zero it */ + init_page_for_write(page, cow_read); + } + set_page_writeback(page); + unlock_page(page); + + return page; +} + static enum pnfs_try_status bl_write_pagelist(struct nfs_write_data *wdata, int sync) { @@ -549,32 +598,13 @@ fill_invalid_ext: dprintk("%s zero %dth page: index %lu isect %llu\n", __func__, npg_zero, index, (unsigned long long)isect); - page = - find_or_create_page(wdata->inode->i_mapping, index, - GFP_NOFS); - if (!page) { - dprintk("%s oom\n", __func__); - wdata->pnfs_error = -ENOMEM; + page = bl_find_get_zeroing_page(wdata->inode, index, + cow_read); + if (unlikely(IS_ERR(page))) { + wdata->pnfs_error = PTR_ERR(page); goto out; - } - - /* PageDirty: Other will write this out - * PageWriteback: Other is writing this out - * PageUptodate: It was read before - * sector_initialized: already written out - */ - if (PageDirty(page) || PageWriteback(page)) { - print_page(page); - unlock_page(page); - page_cache_release(page); + } else if (page == NULL) goto next_page; - } - if (!PageUptodate(page)) { - /* New page, readin or zero it */ - init_page_for_write(page, cow_read); - } - set_page_writeback(page); - unlock_page(page); ret = bl_mark_sectors_init(be->be_inval, isect, PAGE_CACHE_SECTORS); -- cgit v1.2.3 From c0411a94a8f318379464e29dd81db806249dbca6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:44 +0800 Subject: pnfsblock: remove rpc_call_ops from struct parallel_io block layout can just make use of generic read/write_done. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 23427362185..9215c6644a3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -90,7 +90,6 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) */ struct parallel_io { struct kref refcnt; - struct rpc_call_ops call_ops; void (*pnfs_callback) (void *data); void *data; }; @@ -226,14 +225,6 @@ bl_end_par_io_read(void *data) schedule_work(&rdata->task.u.tk_work); } -/* We don't want normal .rpc_call_done callback used, so we replace it - * with this stub. - */ -static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) -{ - return; -} - static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -253,8 +244,6 @@ bl_read_pagelist(struct nfs_read_data *rdata) par = alloc_parallel(rdata); if (!par) goto use_mds; - par->call_ops = *rdata->mds_ops; - par->call_ops.rpc_call_done = bl_rpc_do_nothing; par->pnfs_callback = bl_end_par_io_read; /* At this point, we can no longer jump to use_mds */ @@ -564,8 +553,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) par = alloc_parallel(wdata); if (!par) return PNFS_NOT_ATTEMPTED; - par->call_ops = *wdata->mds_ops; - par->call_ops.rpc_call_done = bl_rpc_do_nothing; par->pnfs_callback = bl_end_par_io_write; /* At this point, have to be more careful with error handling */ -- cgit v1.2.3 From 7c5465d6ccd759caa959828e2add5603518dafc4 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 12 Jan 2012 23:18:46 +0800 Subject: pnfsblock: alloc short extent before submit bio As discussed earlier, it is better for block client to allocate memory for tracking extents state before submitting bio. So the patch does it by allocating a short_extent for every INVALID extent touched by write pagelist and for every zeroing page we created, saving them in layout header. Then in end_io we can just use them to create commit list items and avoid memory allocation there. Signed-off-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 74 ++++++++++++++++++++++++++-------- fs/nfs/blocklayout/blocklayout.h | 9 ++++- fs/nfs/blocklayout/extents.c | 85 ++++++++++++++++++++++++++++++---------- 3 files changed, 131 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9215c6644a3..48cfac31f64 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) */ struct parallel_io { struct kref refcnt; - void (*pnfs_callback) (void *data); + void (*pnfs_callback) (void *data, int num_se); void *data; + int bse_count; }; static inline struct parallel_io *alloc_parallel(void *data) @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) if (rv) { rv->data = data; kref_init(&rv->refcnt); + rv->bse_count = 0; } return rv; } @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref) struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); dprintk("%s enter\n", __func__); - p->pnfs_callback(p->data); + p->pnfs_callback(p->data, p->bse_count); kfree(p); } @@ -216,7 +218,7 @@ static void bl_read_cleanup(struct work_struct *work) } static void -bl_end_par_io_read(void *data) +bl_end_par_io_read(void *data, int unused) { struct nfs_read_data *rdata = data; @@ -317,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, { sector_t isect, end; struct pnfs_block_extent *be; + struct pnfs_block_short_extent *se; dprintk("%s(%llu, %u)\n", __func__, offset, count); if (count == 0) @@ -329,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, be = bl_find_get_extent(bl, isect, NULL); BUG_ON(!be); /* FIXME */ len = min(end, be->be_f_offset + be->be_length) - isect; - if (be->be_state == PNFS_BLOCK_INVALID_DATA) - bl_mark_for_commit(be, isect, len); /* What if fails? */ + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + se = bl_pop_one_short_extent(be->be_inval); + BUG_ON(!se); + bl_mark_for_commit(be, isect, len, se); + } isect += len; bl_put_extent(be); } @@ -352,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) end_page_writeback(page); page_cache_release(page); } while (bvec >= bio->bi_io_vec); - if (!uptodate) { + + if (unlikely(!uptodate)) { if (!wdata->pnfs_error) wdata->pnfs_error = -EIO; pnfs_set_lo_fail(wdata->lseg); @@ -361,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) put_parallel(par); } -/* This is basically copied from mpage_end_io_read */ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; @@ -387,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work) dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); wdata = container_of(task, struct nfs_write_data, task); - if (!wdata->pnfs_error) { + if (likely(!wdata->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ mark_extents_written(BLK_LSEG2EXT(wdata->lseg), wdata->args.offset, wdata->args.count); @@ -396,10 +402,15 @@ static void bl_write_cleanup(struct work_struct *work) } /* Called when last of bios associated with a bl_write_pagelist call finishes */ -static void bl_end_par_io_write(void *data) +static void bl_end_par_io_write(void *data, int num_se) { struct nfs_write_data *wdata = data; + if (unlikely(wdata->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, + num_se); + } + wdata->task.tk_status = wdata->pnfs_error; wdata->verf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); @@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) */ par = alloc_parallel(wdata); if (!par) - return PNFS_NOT_ATTEMPTED; + goto out_mds; par->pnfs_callback = bl_end_par_io_write; /* At this point, have to be more careful with error handling */ @@ -560,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); if (!be || !is_writable(be, isect)) { dprintk("%s no matching extents!\n", __func__); - wdata->pnfs_error = -EINVAL; - goto out; + goto out_mds; } /* First page inside INVALID extent */ if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + if (likely(!bl_push_one_short_extent(be->be_inval))) + par->bse_count++; + else + goto out_mds; temp = offset >> PAGE_CACHE_SHIFT; npg_zero = do_div(temp, npg_per_block); isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & @@ -603,6 +617,19 @@ fill_invalid_ext: wdata->pnfs_error = ret; goto out; } + if (likely(!bl_push_one_short_extent(be->be_inval))) + par->bse_count++; + else { + end_page_writeback(page); + page_cache_release(page); + wdata->pnfs_error = -ENOMEM; + goto out; + } + /* FIXME: This should be done in bi_end_io */ + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE); + bio = bl_add_page_to_bio(bio, npg_zero, WRITE, isect, page, be, bl_end_io_write_zero, par); @@ -611,10 +638,6 @@ fill_invalid_ext: bio = NULL; goto out; } - /* FIXME: This should be done in bi_end_io */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), - page->index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE); next_page: isect += PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS; @@ -638,6 +661,15 @@ next_page: wdata->pnfs_error = -EINVAL; goto out; } + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + if (likely(!bl_push_one_short_extent( + be->be_inval))) + par->bse_count++; + else { + wdata->pnfs_error = -ENOMEM; + goto out; + } + } extent_length = be->be_length - (isect - be->be_f_offset); } @@ -685,6 +717,10 @@ out: bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; +out_mds: + bl_put_extent(be); + kfree(par); + return PNFS_NOT_ATTEMPTED; } /* FIXME - range ignored */ @@ -711,11 +747,17 @@ static void release_inval_marks(struct pnfs_inval_markings *marks) { struct pnfs_inval_tracking *pos, *temp; + struct pnfs_block_short_extent *se, *stemp; list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { list_del(&pos->it_link); kfree(pos); } + + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { + list_del(&se->bse_node); + kfree(se); + } return; } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 60728acc7b9..e31a2df28e7 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -70,6 +70,7 @@ struct pnfs_inval_markings { spinlock_t im_lock; struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ sector_t im_block_size; /* Server blocksize in sectors */ + struct list_head im_extents; /* Short extents for INVAL->RW conversion */ }; struct pnfs_inval_tracking { @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) { spin_lock_init(&marks->im_lock); INIT_LIST_HEAD(&marks->im_tree.mtt_stub); + INIT_LIST_HEAD(&marks->im_extents); marks->im_block_size = blocksize; marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, blocksize); @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, int bl_add_merge_extent(struct pnfs_block_layout *bl, struct pnfs_block_extent *new); int bl_mark_for_commit(struct pnfs_block_extent *be, - sector_t offset, sector_t length); + sector_t offset, sector_t length, + struct pnfs_block_short_extent *new); +int bl_push_one_short_extent(struct pnfs_inval_markings *marks); +struct pnfs_block_short_extent * +bl_pop_one_short_extent(struct pnfs_inval_markings *marks); +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index d0f52ed2242..1abac09f7cd 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -157,10 +157,10 @@ static int _preload_range(struct pnfs_inval_markings *marks, goto out_cleanup; } - spin_lock(&marks->im_lock); + spin_lock_bh(&marks->im_lock); for (s = start; s < end; s += tree->mtt_step_size) used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); status = 0; @@ -179,9 +179,9 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) { int rv; - spin_lock(&marks->im_lock); + spin_lock_bh(&marks->im_lock); rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); return rv; } @@ -221,9 +221,9 @@ static int is_range_written(struct pnfs_inval_markings *marks, { int rv; - spin_lock(&marks->im_lock); + spin_lock_bh(&marks->im_lock); rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); return rv; } @@ -244,15 +244,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks, if (_preload_range(marks, start, end - start)) goto outerr; - spin_lock(&marks->im_lock); + spin_lock_bh(&marks->im_lock); if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) goto out_unlock; - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); return 0; out_unlock: - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); outerr: return -ENOMEM; } @@ -267,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks, dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, (u64)offset, (u64)length); - spin_lock(&marks->im_lock); + spin_lock_bh(&marks->im_lock); status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); - spin_unlock(&marks->im_lock); + spin_unlock_bh(&marks->im_lock); return status; } @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, /* Note the range described by offset, length is guaranteed to be contained * within be. + * new will be freed, either by this function or add_to_commitlist if they + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. */ int bl_mark_for_commit(struct pnfs_block_extent *be, - sector_t offset, sector_t length) + sector_t offset, sector_t length, + struct pnfs_block_short_extent *new) { sector_t new_end, end = offset + length; - struct pnfs_block_short_extent *new; struct pnfs_block_layout *bl = container_of(be->be_inval, struct pnfs_block_layout, bl_inval); - new = kmalloc(sizeof(*new), GFP_NOFS); - if (!new) - return -ENOMEM; - mark_written_sectors(be->be_inval, offset, length); /* We want to add the range to commit list, but it must be * block-normalized, and verified that the normalized range has @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, new->bse_mdev = be->be_mdev; spin_lock(&bl->bl_ext_lock); - /* new will be freed, either by add_to_commitlist if it decides not - * to use it, or after LAYOUTCOMMIT uses it in the commitlist. - */ add_to_commitlist(bl, new); spin_unlock(&bl->bl_ext_lock); return 0; @@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, } } } + +int bl_push_one_short_extent(struct pnfs_inval_markings *marks) +{ + struct pnfs_block_short_extent *new; + + new = kmalloc(sizeof(*new), GFP_NOFS); + if (unlikely(!new)) + return -ENOMEM; + + spin_lock_bh(&marks->im_lock); + list_add(&new->bse_node, &marks->im_extents); + spin_unlock_bh(&marks->im_lock); + + return 0; +} + +struct pnfs_block_short_extent * +bl_pop_one_short_extent(struct pnfs_inval_markings *marks) +{ + struct pnfs_block_short_extent *rv = NULL; + + spin_lock_bh(&marks->im_lock); + if (!list_empty(&marks->im_extents)) { + rv = list_entry((&marks->im_extents)->next, + struct pnfs_block_short_extent, bse_node); + list_del_init(&rv->bse_node); + } + spin_unlock_bh(&marks->im_lock); + + return rv; +} + +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) +{ + struct pnfs_block_short_extent *se = NULL, *tmp; + + if (num_to_free <= 0) + return; + + spin_lock(&marks->im_lock); + list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { + list_del(&se->bse_node); + kfree(se); + if (--num_to_free == 0) + break; + } + spin_unlock(&marks->im_lock); + + BUG_ON(num_to_free > 0); +} -- cgit v1.2.3 From 69116f279a9eaf4c540934269342d9149538fc79 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Jan 2012 09:32:17 +1030 Subject: module_param: avoid bool abuse, add bint for special cases. For historical reasons, we allow module_param(bool) to take an int (or an unsigned int). That's going away. A few drivers really want an int: they set it to -1 and a parameter will set it to 0 or 1. This sucks: reading them from sysfs will give 'Y' for both -1 and 1, but if we change it to an int, then the users might be broken (if they did "param" instead of "param=1"). Use a new 'bint' parser for them. (ntfs has a different problem: it needs an int for debug_msgs because it's also exposed via sysctl.) Cc: Steve Glendinning Cc: Jean Delvare Cc: Guenter Roeck Cc: Hoang-Nam Nguyen Cc: Christoph Raisch Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux390@de.ibm.com Cc: Anton Altaparmakov Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: lm-sensors@lm-sensors.org Cc: linux-rdma@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: alsa-devel@alsa-project.org Acked-by: Takashi Iwai (For the sound part) Acked-by: Guenter Roeck (For the hwmon driver) Signed-off-by: Rusty Russell --- drivers/hwmon/emc2103.c | 2 +- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- drivers/s390/cio/cmf.c | 2 +- fs/ntfs/super.c | 2 +- include/linux/moduleparam.h | 6 ++++++ kernel/params.c | 24 ++++++++++++++++++++++++ sound/pci/intel8x0.c | 4 ++-- 7 files changed, 36 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/drivers/hwmon/emc2103.c b/drivers/hwmon/emc2103.c index 848a2b0bc83..865063914d7 100644 --- a/drivers/hwmon/emc2103.c +++ b/drivers/hwmon/emc2103.c @@ -55,7 +55,7 @@ static const u8 REG_TEMP_MAX[4] = { 0x34, 0x30, 0x31, 0x32 }; * it. Default is to leave the device in the state it's already in (-1). * This parameter allows APD mode to be optionally forced on or off */ static int apd = -1; -module_param(apd, bool, 0); +module_param(apd, bint, 0); MODULE_PARM_DESC(init, "Set to zero to disable anti-parallel diode mode"); struct temperature { diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c240e9972cb..8af8d4f7bdb 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 2985eb43948..204ca728e7f 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -98,7 +98,7 @@ enum cmb_format { * enum cmb_format. */ static int format = CMF_AUTODETECT; -module_param(format, bool, 0444); +module_param(format, bint, 0444); /** * struct cmb_operations - functions to use depending on cmb_format diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 608be451609..5a4a8af5c40 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3198,7 +3198,7 @@ MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparm MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG -module_param(debug_msgs, bool, 0); +module_param(debug_msgs, bint, 0); MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); #endif diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 794d4b0f121..6bdde0c3bcc 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -367,6 +367,12 @@ extern int param_set_invbool(const char *val, const struct kernel_param *kp); extern int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) +/* An int, which can only be set like a bool (though it shows as an int). */ +extern struct kernel_param_ops param_ops_bint; +extern int param_set_bint(const char *val, const struct kernel_param *kp); +#define param_get_bint param_get_int +#define param_check_bint param_check_int + /** * module_param_array - a parameter which is an array of some type * @name: the name of the array variable diff --git a/kernel/params.c b/kernel/params.c index 9240664af11..32ee0430828 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -363,6 +363,30 @@ struct kernel_param_ops param_ops_invbool = { }; EXPORT_SYMBOL(param_ops_invbool); +int param_set_bint(const char *val, const struct kernel_param *kp) +{ + struct kernel_param boolkp; + bool v; + int ret; + + /* Match bool exactly, by re-using it. */ + boolkp = *kp; + boolkp.arg = &v; + boolkp.flags |= KPARAM_ISBOOL; + + ret = param_set_bool(val, &boolkp); + if (ret == 0) + *(int *)kp->arg = v; + return ret; +} +EXPORT_SYMBOL(param_set_bint); + +struct kernel_param_ops param_ops_bint = { + .set = param_set_bint, + .get = param_get_int, +}; +EXPORT_SYMBOL(param_ops_bint); + /* We break the rule and mangle the string. */ static int param_array(const char *name, const char *val, diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 40b181bab93..9f3b01bb72c 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -95,13 +95,13 @@ module_param(ac97_quirk, charp, 0444); MODULE_PARM_DESC(ac97_quirk, "AC'97 workaround for strange hardware."); module_param(buggy_semaphore, bool, 0444); MODULE_PARM_DESC(buggy_semaphore, "Enable workaround for hardwares with problematic codec semaphores."); -module_param(buggy_irq, bool, 0444); +module_param(buggy_irq, bint, 0444); MODULE_PARM_DESC(buggy_irq, "Enable workaround for buggy interrupts on some motherboards."); module_param(xbox, bool, 0444); MODULE_PARM_DESC(xbox, "Set to 1 for Xbox, if you have problems with the AC'97 codec detection."); module_param(spdif_aclink, int, 0444); MODULE_PARM_DESC(spdif_aclink, "S/PDIF over AC-link."); -module_param(inside_vm, bool, 0444); +module_param(inside_vm, bint, 0444); MODULE_PARM_DESC(inside_vm, "KVM/Parallels optimization."); /* just for backward compatibility */ -- cgit v1.2.3 From 90ab5ee94171b3e28de6bb42ee30b527014e0be7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Jan 2012 09:32:20 +1030 Subject: module_param: make bool parameters really bool (drivers & misc) module_param(bool) used to counter-intuitively take an int. In fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy trick. It's time to remove the int/unsigned int option. For this version it'll simply give a warning, but it'll break next kernel version. Acked-by: Mauro Carvalho Chehab Signed-off-by: Rusty Russell --- drivers/accessibility/braille/braille_console.c | 2 +- drivers/acpi/acpica/acglobal.h | 2 +- drivers/acpi/apei/ghes.c | 2 +- drivers/acpi/apei/hest.c | 2 +- drivers/acpi/dock.c | 2 +- drivers/acpi/pci_slot.c | 2 +- drivers/acpi/video.c | 6 +++--- drivers/ata/sata_nv.c | 6 +++--- drivers/ata/sata_sil24.c | 2 +- drivers/atm/he.c | 6 +++--- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/paride/bpck6.c | 5 ++--- drivers/block/paride/pd.c | 3 ++- drivers/block/paride/pf.c | 4 +++- drivers/block/paride/pg.c | 3 ++- drivers/block/paride/pt.c | 4 +++- drivers/block/xd.c | 2 +- drivers/bluetooth/btusb.c | 12 ++++++------ drivers/bluetooth/hci_bcsp.c | 4 ++-- drivers/bluetooth/hci_ldisc.c | 2 +- drivers/cdrom/cdrom.c | 12 ++++++------ drivers/char/agp/amd64-agp.c | 2 +- drivers/char/agp/sis-agp.c | 2 +- drivers/char/i8k.c | 8 ++++---- drivers/char/ipmi/ipmi_si_intf.c | 2 +- drivers/char/lp.c | 2 +- drivers/char/nwflash.c | 2 +- drivers/char/pcmcia/synclink_cs.c | 2 +- drivers/char/random.c | 2 +- drivers/char/tpm/tpm_tis.c | 6 +++--- drivers/edac/r82600_edac.c | 2 +- drivers/firewire/sbp2.c | 2 +- drivers/hid/hid-prodikeys.c | 2 +- drivers/hwmon/abituguru.c | 2 +- drivers/hwmon/abituguru3.c | 4 ++-- drivers/hwmon/acpi_power_meter.c | 2 +- drivers/hwmon/adm1021.c | 2 +- drivers/hwmon/ads7828.c | 4 ++-- drivers/hwmon/dme1737.c | 4 ++-- drivers/hwmon/it87.c | 4 ++-- drivers/hwmon/lm93.c | 4 ++-- drivers/hwmon/max1668.c | 2 +- drivers/hwmon/w83627hf.c | 2 +- drivers/hwmon/w83781d.c | 4 ++-- drivers/hwmon/w83791d.c | 4 ++-- drivers/hwmon/w83792d.c | 2 +- drivers/hwmon/w83793.c | 2 +- drivers/hwmon/w83795.c | 2 +- drivers/hwmon/w83l786ng.c | 2 +- drivers/i2c/busses/i2c-highlander.c | 2 +- drivers/i2c/busses/i2c-ibm_iic.c | 4 ++-- drivers/i2c/busses/i2c-sis630.c | 4 ++-- drivers/i2c/busses/i2c-viapro.c | 2 +- drivers/ide/ali14xx.c | 2 +- drivers/ide/cmd640.c | 2 +- drivers/ide/dtc2278.c | 2 +- drivers/ide/gayle.c | 2 +- drivers/ide/ht6560b.c | 2 +- drivers/ide/ide-4drives.c | 2 +- drivers/ide/ide-acpi.c | 6 +++--- drivers/ide/ide-pci-generic.c | 2 +- drivers/ide/qd65xx.c | 2 +- drivers/ide/umc8672.c | 2 +- drivers/infiniband/hw/ehca/ehca_classes.h | 4 ++-- drivers/infiniband/hw/ehca/ehca_main.c | 8 ++++---- drivers/infiniband/hw/nes/nes.c | 2 +- drivers/input/joystick/xpad.c | 6 +++--- drivers/input/misc/wistron_btns.c | 2 +- drivers/input/mouse/psmouse-base.c | 2 +- drivers/input/mouse/synaptics_i2c.c | 6 +++--- drivers/input/serio/hp_sdc.c | 2 +- drivers/input/touchscreen/eeti_ts.c | 4 ++-- drivers/input/touchscreen/htcpen.c | 4 ++-- drivers/input/touchscreen/ucb1400_ts.c | 2 +- drivers/input/touchscreen/usbtouchscreen.c | 4 ++-- drivers/isdn/hardware/avm/b1dma.c | 2 +- drivers/isdn/hardware/avm/c4.c | 2 +- drivers/isdn/sc/init.c | 2 +- drivers/leds/leds-clevo-mail.c | 2 +- drivers/leds/leds-ss4200.c | 2 +- drivers/macintosh/ams/ams-core.c | 2 +- drivers/macintosh/ams/ams-input.c | 4 ++-- drivers/macintosh/therm_adt746x.c | 2 +- drivers/media/dvb/dvb-usb/af9005.c | 2 +- drivers/media/dvb/dvb-usb/af9005.h | 2 +- drivers/media/radio/radio-gemtek.c | 10 +++++----- drivers/media/radio/radio-miropcm20.c | 2 +- drivers/media/rc/lirc_dev.c | 2 +- drivers/media/rc/mceusb.c | 4 ++-- drivers/media/rc/streamzap.c | 4 ++-- drivers/media/rc/winbond-cir.c | 4 ++-- drivers/media/video/c-qcam.c | 2 +- drivers/media/video/cs5345.c | 2 +- drivers/media/video/cs53l32a.c | 2 +- drivers/media/video/cx18/cx18-driver.c | 2 +- drivers/media/video/cx25821/cx25821-alsa.c | 2 +- drivers/media/video/cx88/cx88-alsa.c | 2 +- drivers/media/video/gspca/m5602/m5602_core.c | 4 ++-- drivers/media/video/gspca/m5602/m5602_mt9m111.h | 2 +- drivers/media/video/gspca/m5602/m5602_ov7660.h | 2 +- drivers/media/video/gspca/m5602/m5602_ov9650.h | 2 +- drivers/media/video/gspca/m5602/m5602_po1030.h | 2 +- drivers/media/video/gspca/m5602/m5602_s5k4aa.h | 2 +- drivers/media/video/gspca/m5602/m5602_s5k83a.h | 2 +- drivers/media/video/gspca/stv06xx/stv06xx.c | 4 ++-- drivers/media/video/hdpvr/hdpvr-core.c | 2 +- drivers/media/video/ivtv/ivtv-driver.c | 2 +- drivers/media/video/ivtv/ivtvfb.c | 2 +- drivers/media/video/marvell-ccic/mcam-core.c | 6 +++--- drivers/media/video/msp3400-driver.c | 6 +++--- drivers/media/video/msp3400-driver.h | 6 +++--- drivers/media/video/omap/omap_vout.c | 6 +++--- drivers/media/video/omap/omap_vout_vrfb.c | 2 +- drivers/media/video/ov7670.c | 2 +- drivers/media/video/saa7115.c | 2 +- drivers/media/video/stk-webcam.c | 4 ++-- drivers/media/video/tm6000/tm6000-alsa.c | 2 +- drivers/media/video/tvp514x.c | 2 +- drivers/media/video/tvp7002.c | 2 +- drivers/media/video/upd64083.c | 2 +- drivers/media/video/via-camera.c | 4 ++-- drivers/media/video/zoran/zoran_device.c | 2 +- drivers/media/video/zoran/zr36060.c | 2 +- drivers/memstick/host/jmb38x_ms.c | 2 +- drivers/memstick/host/r592.c | 2 +- drivers/memstick/host/tifm_ms.c | 2 +- drivers/misc/iwmc3200top/main.c | 12 ++++++------ drivers/mmc/core/core.c | 6 +++--- drivers/mmc/core/core.h | 2 +- drivers/mmc/host/tifm_sd.c | 4 ++-- drivers/mmc/host/vub300.c | 10 +++++----- drivers/mtd/nand/pxa3xx_nand.c | 2 +- drivers/mtd/nand/r852.c | 2 +- drivers/parport/parport_ip32.c | 2 +- drivers/pci/hotplug/acpi_pcihp.c | 2 +- drivers/pci/hotplug/acpiphp_core.c | 2 +- drivers/pci/hotplug/acpiphp_ibm.c | 2 +- drivers/pci/hotplug/cpcihp_zt5550.c | 4 ++-- drivers/pci/hotplug/cpqphp_core.c | 4 ++-- drivers/pci/hotplug/ibmphp_core.c | 2 +- drivers/pci/hotplug/pci_hotplug_core.c | 2 +- drivers/pci/hotplug/pciehp.h | 6 +++--- drivers/pci/hotplug/pciehp_core.c | 6 +++--- drivers/pci/hotplug/pcihp_skeleton.c | 2 +- drivers/pci/hotplug/rpaphp.h | 2 +- drivers/pci/hotplug/rpaphp_core.c | 2 +- drivers/pci/hotplug/shpchp.h | 4 ++-- drivers/pci/hotplug/shpchp_core.c | 4 ++-- drivers/pci/pcie/aer/aer_inject.c | 2 +- drivers/pci/pcie/aer/aerdrv_core.c | 4 ++-- drivers/pcmcia/yenta_socket.c | 6 +++--- drivers/platform/x86/compal-laptop.c | 2 +- drivers/platform/x86/intel_oaktrail.c | 2 +- drivers/platform/x86/msi-laptop.c | 2 +- drivers/platform/x86/samsung-laptop.c | 4 ++-- drivers/platform/x86/thinkpad_acpi.c | 16 ++++++++-------- drivers/platform/x86/wmi.c | 4 ++-- drivers/power/ds2760_battery.c | 2 +- drivers/s390/char/raw3270.c | 2 +- drivers/s390/char/vmwatchdog.c | 4 ++-- drivers/scsi/aha1542.c | 2 +- drivers/scsi/dc395x.c | 2 +- drivers/scsi/nsp32.c | 4 ++-- drivers/scsi/pcmcia/nsp_cs.c | 2 +- drivers/staging/comedi/comedi_fops.c | 2 +- drivers/staging/comedi/comedi_fops.h | 3 ++- drivers/staging/media/go7007/snd-go7007.c | 2 +- drivers/staging/media/lirc/lirc_bt829.c | 2 +- drivers/staging/media/lirc/lirc_igorplugusb.c | 4 ++-- drivers/staging/media/lirc/lirc_parallel.c | 4 ++-- drivers/staging/media/lirc/lirc_serial.c | 10 +++++----- drivers/staging/media/lirc/lirc_sir.c | 2 +- drivers/staging/media/lirc/lirc_zilog.c | 4 ++-- drivers/staging/quatech_usb2/quatech_usb2.c | 2 +- drivers/staging/serqt_usb2/serqt_usb2.c | 2 +- drivers/staging/speakup/speakup.h | 2 +- drivers/staging/speakup/synth.c | 2 +- drivers/staging/vme/bridges/vme_tsi148.c | 2 +- drivers/tty/rocket.c | 2 +- drivers/tty/synclink.c | 2 +- drivers/tty/synclinkmp.c | 2 +- drivers/usb/atm/speedtch.c | 6 +++--- drivers/usb/atm/ueagle-atm.c | 2 +- drivers/usb/core/devio.c | 2 +- drivers/usb/core/hub.c | 8 ++++---- drivers/usb/core/usb.c | 2 +- drivers/usb/gadget/amd5536udc.c | 8 ++++---- drivers/usb/gadget/ether.c | 4 ++-- drivers/usb/gadget/file_storage.c | 10 +++++----- drivers/usb/gadget/net2272.c | 2 +- drivers/usb/gadget/net2280.c | 6 +++--- drivers/usb/gadget/omap_udc.c | 2 +- drivers/usb/gadget/pch_udc.c | 2 +- drivers/usb/gadget/serial.c | 4 ++-- drivers/usb/gadget/zero.c | 2 +- drivers/usb/host/ehci-hcd.c | 2 +- drivers/usb/host/ohci-hcd.c | 4 ++-- drivers/usb/host/oxu210hp-hcd.c | 2 +- drivers/usb/host/u132-hcd.c | 2 +- drivers/usb/host/uhci-hcd.c | 2 +- drivers/usb/misc/ftdi-elan.c | 2 +- drivers/usb/misc/iowarrior.c | 2 +- drivers/usb/musb/cppi_dma.c | 2 +- drivers/usb/musb/musb_core.c | 2 +- drivers/usb/serial/aircable.c | 2 +- drivers/usb/serial/ark3116.c | 2 +- drivers/usb/serial/belkin_sa.c | 2 +- drivers/usb/serial/ch341.c | 2 +- drivers/usb/serial/cp210x.c | 2 +- drivers/usb/serial/cyberjack.c | 2 +- drivers/usb/serial/cypress_m8.c | 6 +++--- drivers/usb/serial/digi_acceleport.c | 2 +- drivers/usb/serial/empeg.c | 2 +- drivers/usb/serial/ftdi_sio.c | 2 +- drivers/usb/serial/funsoft.c | 2 +- drivers/usb/serial/garmin_gps.c | 2 +- drivers/usb/serial/io_edgeport.c | 2 +- drivers/usb/serial/io_ti.c | 4 ++-- drivers/usb/serial/ipaq.c | 2 +- drivers/usb/serial/ipw.c | 2 +- drivers/usb/serial/ir-usb.c | 2 +- drivers/usb/serial/iuu_phoenix.c | 6 +++--- drivers/usb/serial/keyspan.c | 2 +- drivers/usb/serial/keyspan_pda.c | 2 +- drivers/usb/serial/kl5kusb105.c | 2 +- drivers/usb/serial/mct_u232.c | 2 +- drivers/usb/serial/mos7720.c | 2 +- drivers/usb/serial/mos7840.c | 2 +- drivers/usb/serial/navman.c | 2 +- drivers/usb/serial/omninet.c | 2 +- drivers/usb/serial/opticon.c | 2 +- drivers/usb/serial/option.c | 2 +- drivers/usb/serial/oti6858.c | 2 +- drivers/usb/serial/pl2303.c | 2 +- drivers/usb/serial/qcserial.c | 2 +- drivers/usb/serial/safe_serial.c | 6 +++--- drivers/usb/serial/sierra.c | 4 ++-- drivers/usb/serial/spcp8x5.c | 2 +- drivers/usb/serial/ssu100.c | 2 +- drivers/usb/serial/symbolserial.c | 2 +- drivers/usb/serial/ti_usb_3410_5052.c | 2 +- drivers/usb/serial/usb-serial.c | 2 +- drivers/usb/serial/usb_wwan.c | 2 +- drivers/usb/serial/visor.c | 2 +- drivers/usb/serial/whiteheat.c | 2 +- drivers/video/aty/atyfb_base.c | 4 ++-- drivers/video/aty/radeon_base.c | 18 +++++++++--------- drivers/video/cirrusfb.c | 2 +- drivers/video/hgafb.c | 2 +- drivers/video/intelfb/intelfbdrv.c | 16 ++++++++-------- drivers/video/logo/logo.c | 2 +- drivers/video/neofb.c | 10 +++++----- drivers/video/omap/omapfb_main.c | 4 ++-- drivers/video/omap2/dss/core.c | 2 +- drivers/video/omap2/dss/dsi.c | 4 ++-- drivers/video/omap2/dss/dss.h | 2 +- drivers/video/omap2/omapfb/omapfb-main.c | 8 ++++---- drivers/video/omap2/omapfb/omapfb.h | 2 +- drivers/video/pm2fb.c | 8 ++++---- drivers/video/pm3fb.c | 4 ++-- drivers/video/riva/fbdev.c | 6 +++--- drivers/video/smscufx.c | 4 ++-- drivers/video/sstfb.c | 6 +++--- drivers/video/tdfxfb.c | 2 +- drivers/video/udlfb.c | 6 +++--- drivers/video/uvesafb.c | 6 +++--- drivers/video/vfb.c | 2 +- drivers/watchdog/f71808e_wdt.c | 2 +- drivers/watchdog/mpc8xxx_wdt.c | 2 +- drivers/xen/xen-pciback/conf_space.c | 2 +- drivers/xen/xen-pciback/xenbus.c | 2 +- fs/lockd/mon.c | 2 +- fs/nfs/client.c | 2 +- fs/nfs/inode.c | 2 +- include/acpi/acpixf.h | 2 +- include/acpi/apei.h | 4 ++-- include/linux/console.h | 2 +- include/linux/lockd/lockd.h | 2 +- include/linux/mmc/host.h | 2 +- security/apparmor/include/apparmor.h | 10 +++++----- security/apparmor/lsm.c | 12 ++++++------ virt/kvm/iommu.c | 2 +- 283 files changed, 471 insertions(+), 465 deletions(-) (limited to 'fs') diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c index cb423f5aef2..c339a0880e6 100644 --- a/drivers/accessibility/braille/braille_console.c +++ b/drivers/accessibility/braille/braille_console.c @@ -44,7 +44,7 @@ MODULE_LICENSE("GPL"); */ /* Emit various sounds */ -static int sound; +static bool sound; module_param(sound, bool, 0); MODULE_PARM_DESC(sound, "emit sounds"); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 76dc02f1557..e6652d716e4 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -108,7 +108,7 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE); /* * Optionally enable output from the AML Debug Object. */ -u32 ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE); +bool ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE); /* * Optionally copy the entire DSDT to local memory (instead of simply diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b8e08cb67a1..ebaf037a787 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -118,7 +118,7 @@ struct ghes_estatus_cache { struct rcu_head rcu; }; -int ghes_disable; +bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); static int ghes_panic_timeout __read_mostly = 30; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 05fee06f4d6..ee7fddc4665 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -41,7 +41,7 @@ #define HEST_PFX "HEST: " -int hest_disable; +bool hest_disable; EXPORT_SYMBOL_GPL(hest_disable); /* HEST table parsing */ diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 19a61136d84..88eb1430466 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -43,7 +43,7 @@ MODULE_AUTHOR("Kristen Carlson Accardi"); MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION); MODULE_LICENSE("GPL"); -static int immediate_undock = 1; +static bool immediate_undock = 1; module_param(immediate_undock, bool, 0644); MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to " "undock immediately when the undock button is pressed, 0 will cause" diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index 07f7fea8a4e..e50e31a518a 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -34,7 +34,7 @@ #include #include -static int debug; +static bool debug; static int check_sta_before_sun; #define DRIVER_VERSION "0.1" diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 08a44b532f7..eaef02afc7c 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -69,21 +69,21 @@ MODULE_AUTHOR("Bruno Ducrot"); MODULE_DESCRIPTION("ACPI Video Driver"); MODULE_LICENSE("GPL"); -static int brightness_switch_enabled = 1; +static bool brightness_switch_enabled = 1; module_param(brightness_switch_enabled, bool, 0644); /* * By default, we don't allow duplicate ACPI video bus devices * under the same VGA controller */ -static int allow_duplicates; +static bool allow_duplicates; module_param(allow_duplicates, bool, 0644); /* * Some BIOSes claim they use minimum backlight at boot, * and this may bring dimming screen after boot */ -static int use_bios_initial_backlight = 1; +static bool use_bios_initial_backlight = 1; module_param(use_bios_initial_backlight, bool, 0644); static int register_count = 0; diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index e0bc9646a38..55d6179dde5 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -599,9 +599,9 @@ MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, nv_pci_tbl); MODULE_VERSION(DRV_VERSION); -static int adma_enabled; -static int swncq_enabled = 1; -static int msi_enabled; +static bool adma_enabled; +static bool swncq_enabled = 1; +static bool msi_enabled; static void nv_adma_register_mode(struct ata_port *ap) { diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 1e9140626a8..e7e610aa9a7 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -417,7 +417,7 @@ static struct ata_port_operations sil24_ops = { #endif }; -static int sata_sil24_msi; /* Disable MSI */ +static bool sata_sil24_msi; /* Disable MSI */ module_param_named(msi, sata_sil24_msi, bool, S_IRUGO); MODULE_PARM_DESC(msi, "Enable MSI (Default: false)"); diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 9a51df4f5b7..b182c2f7d77 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -112,12 +112,12 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr); /* globals */ static struct he_dev *he_devs; -static int disable64; +static bool disable64; static short nvpibits = -1; static short nvcibits = -1; static short rx_skb_reserve = 16; -static int irq_coalesce = 1; -static int sdh = 0; +static bool irq_coalesce = 1; +static bool sdh = 0; /* Read from EEPROM = 0000 0011b */ static unsigned int readtab[] = { diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9cf20355cee..8d680562ba7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -59,8 +59,8 @@ /* module parameter, defined in drbd_main.c */ extern unsigned int minor_count; -extern int disable_sendpage; -extern int allow_oos; +extern bool disable_sendpage; +extern bool allow_oos; extern unsigned int cn_idx; #ifdef CONFIG_DRBD_FAULT_INJECTION diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0358e55356c..211fc44f84b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -117,8 +117,8 @@ module_param(fault_devs, int, 0644); /* module parameter, defined */ unsigned int minor_count = DRBD_MINOR_COUNT_DEF; -int disable_sendpage; -int allow_oos; +bool disable_sendpage; +bool allow_oos; unsigned int cn_idx = CN_IDX_DRBD; int proc_details; /* Detail level in proc drbd*/ diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c index ad124525ac2..ec64e7f5d1c 100644 --- a/drivers/block/paride/bpck6.c +++ b/drivers/block/paride/bpck6.c @@ -20,9 +20,6 @@ */ -/* PARAMETERS */ -static int verbose; /* set this to 1 to see debugging messages and whatnot */ - #define BACKPACK_VERSION "2.0.2" #include @@ -36,6 +33,8 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */ #include "ppc6lnx.c" #include "paride.h" +/* PARAMETERS */ +static bool verbose; /* set this to 1 to see debugging messages and whatnot */ #define PPCSTRUCT(pi) ((Interface *)(pi->private)) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 869e7676d46..831e3ac156e 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -124,8 +124,9 @@ by default. */ +#include -static int verbose = 0; +static bool verbose = 0; static int major = PD_MAJOR; static char *name = PD_NAME; static int cluster = 64; diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f21b520ef41..ec8f9ed6326 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -118,13 +118,15 @@ #define PF_NAME "pf" #define PF_UNITS 4 +#include + /* Here are things one can override from the insmod command. Most are autoprobed by paride unless set here. Verbose is off by default. */ -static int verbose = 0; +static bool verbose = 0; static int major = PF_MAJOR; static char *name = PF_NAME; static int cluster = 64; diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index a79fb4f7ff6..4a27b1de5fc 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -130,13 +130,14 @@ #define PI_PG 4 #endif +#include /* Here are things one can override from the insmod command. Most are autoprobed by paride unless set here. Verbose is 0 by default. */ -static int verbose = 0; +static bool verbose = 0; static int major = PG_MAJOR; static char *name = PG_NAME; static int disable = 0; diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 7179f79d746..2596042eb98 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -109,13 +109,15 @@ #define PT_NAME "pt" #define PT_UNITS 4 +#include + /* Here are things one can override from the insmod command. Most are autoprobed by paride unless set here. Verbose is on by default. */ -static int verbose = 0; +static bool verbose = 0; static int major = PT_MAJOR; static char *name = PT_NAME; static int disable = 0; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 4abd2bcd20f..51a972704db 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -148,7 +148,7 @@ static volatile int xdc_busy; static struct timer_list xd_watchdog_int; static volatile u_char xd_error; -static int nodma = XD_DONT_USE_DMA; +static bool nodma = XD_DONT_USE_DMA; static struct request_queue *xd_queue; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 55ac349695c..f00f596c102 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -37,13 +37,13 @@ #define VERSION "0.6" -static int ignore_dga; -static int ignore_csr; -static int ignore_sniffer; -static int disable_scofix; -static int force_scofix; +static bool ignore_dga; +static bool ignore_csr; +static bool ignore_sniffer; +static bool disable_scofix; +static bool force_scofix; -static int reset = 1; +static bool reset = 1; static struct usb_driver btusb_driver; diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 9c5b2dc38e2..a767d4de45a 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -49,8 +49,8 @@ #define VERSION "0.3" -static int txcrc = 1; -static int hciextn = 1; +static bool txcrc = 1; +static bool hciextn = 1; #define BCSP_TXWINSIZE 4 diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 48ad2a7ab08..07114489994 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -48,7 +48,7 @@ #define VERSION "2.2" -static int reset = 0; +static bool reset = 0; static struct hci_uart_proto *hup[HCI_UART_MAX_PROTO]; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 2118211aff9..1bbf7645a97 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -285,17 +285,17 @@ #include /* used to tell the module to turn on full debugging messages */ -static int debug; +static bool debug; /* used to keep tray locked at all times */ static int keeplocked; /* default compatibility mode */ -static int autoclose=1; -static int autoeject; -static int lockdoor = 1; +static bool autoclose=1; +static bool autoeject; +static bool lockdoor = 1; /* will we ever get to use this... sigh. */ -static int check_media_type; +static bool check_media_type; /* automatically restart mrw format */ -static int mrw_format_restart = 1; +static bool mrw_format_restart = 1; module_param(debug, bool, 0); module_param(autoclose, bool, 0); module_param(autoeject, bool, 0); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 780498d7658..444f8b6ab41 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -33,7 +33,7 @@ #define ULI_X86_64_ENU_SCR_REG 0x54 static struct resource *aperture_resource; -static int __initdata agp_try_unsupported = 1; +static bool __initdata agp_try_unsupported = 1; static int agp_bridges_found; static void amd64_tlbflush(struct agp_memory *temp) diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c index 29aacd81de7..08704ae5395 100644 --- a/drivers/char/agp/sis-agp.c +++ b/drivers/char/agp/sis-agp.c @@ -17,7 +17,7 @@ #define PCI_DEVICE_ID_SI_662 0x0662 #define PCI_DEVICE_ID_SI_671 0x0671 -static int __devinitdata agp_sis_force_delay = 0; +static bool __devinitdata agp_sis_force_delay = 0; static int __devinitdata agp_sis_agp_spec = -1; static int sis_fetch_size(void) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 6e40072fbf6..40cc0cf2ded 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -69,19 +69,19 @@ MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); MODULE_DESCRIPTION("Driver for accessing SMM BIOS on Dell laptops"); MODULE_LICENSE("GPL"); -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force loading without checking for supported models"); -static int ignore_dmi; +static bool ignore_dmi; module_param(ignore_dmi, bool, 0); MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match"); -static int restricted; +static bool restricted; module_param(restricted, bool, 0); MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set"); -static int power_status; +static bool power_status; module_param(power_status, bool, 0600); MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k"); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 9397ab49b72..50fcf9c0456 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1227,7 +1227,7 @@ static int smi_num; /* Used to sequence the SMIs */ #define DEFAULT_REGSPACING 1 #define DEFAULT_REGSIZE 1 -static int si_trydefaults = 1; +static bool si_trydefaults = 1; static char *si_type[SI_MAX_PARMS]; #define MAX_SI_TYPE_STR 30 static char si_type_str[MAX_SI_TYPE_STR]; diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 97c3edb95ae..f4348560706 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -829,7 +829,7 @@ static struct console lpcons = { static int parport_nr[LP_NO] = { [0 ... LP_NO-1] = LP_PARPORT_UNSPEC }; static char *parport[LP_NO]; -static int reset; +static bool reset; module_param_array(parport, charp, NULL, 0); module_param(reset, bool, 0); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index a12f52400db..bf586ae1ee8 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -51,7 +51,7 @@ static int write_block(unsigned long p, const char __user *buf, int count); #define KFLASH_ID 0x89A6 //Intel flash #define KFLASH_ID4 0xB0D4 //Intel flash 4Meg -static int flashdebug; //if set - we will display progress msgs +static bool flashdebug; //if set - we will display progress msgs static int gbWriteEnable; static int gbWriteBase64Enable; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 15781396af2..07f6a5abe37 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -439,7 +439,7 @@ static int mgslpc_device_count = 0; * .text section address and breakpoint on module load. * This is useful for use with gdb and add-symbol-file command. */ -static int break_on_load=0; +static bool break_on_load=0; /* * Driver major number, defaults to zero to get auto diff --git a/drivers/char/random.c b/drivers/char/random.c index 85da8740586..732215b805c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -387,7 +387,7 @@ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; #if 0 -static int debug; +static bool debug; module_param(debug, bool, 0644); #define DEBUG_ENT(fmt, arg...) do { \ if (debug) \ diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 10cc44ceb5d..a1748621111 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -255,7 +255,7 @@ out: return size; } -static int itpm; +static bool itpm; module_param(itpm, bool, 0444); MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)"); @@ -500,7 +500,7 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id) return IRQ_HANDLED; } -static int interrupts = 1; +static bool interrupts = 1; module_param(interrupts, bool, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); @@ -828,7 +828,7 @@ static struct platform_driver tis_drv = { static struct platform_device *pdev; -static int force; +static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry"); static int __init init_tis(void) diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index b153674431f..e294e1b3616 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -131,7 +131,7 @@ struct r82600_error_info { u32 eapr; }; -static unsigned int disable_hardware_scrub; +static bool disable_hardware_scrub; static struct edac_pci_ctl_info *r82600_pci; diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 68375bc3aef..80e95aa3bf1 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -66,7 +66,7 @@ * * Concurrent logins are useful together with cluster filesystems. */ -static int sbp2_param_exclusive_login = 1; +static bool sbp2_param_exclusive_login = 1; module_param_named(exclusive_login, sbp2_param_exclusive_login, bool, 0644); MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device " "(default = Y, use N for concurrent initiators)"); diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index f779009104e..b71b77ab0dc 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -90,7 +90,7 @@ static const char longname[] = "Prodikeys PC-MIDI Keyboard"; static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; -static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; +static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; module_param_array(index, int, NULL, 0444); module_param_array(id, charp, NULL, 0444); diff --git a/drivers/hwmon/abituguru.c b/drivers/hwmon/abituguru.c index 65a35cf5b3c..3b728e8f169 100644 --- a/drivers/hwmon/abituguru.c +++ b/drivers/hwmon/abituguru.c @@ -145,7 +145,7 @@ static const u8 abituguru_pwm_max[5] = { 0, 255, 255, 75, 75 }; /* Insmod parameters */ -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Set to one to force detection."); static int bank1_types[ABIT_UGURU_MAX_BANK1_SENSORS] = { -1, -1, -1, -1, -1, diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index d30855a7578..34a14a77e00 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -603,11 +603,11 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { /* Insmod parameters */ -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Set to one to force detection."); /* Default verbose is 1, since this driver is still in the testing phase */ -static int verbose = 1; +static bool verbose = 1; module_param(verbose, bool, 0644); MODULE_PARM_DESC(verbose, "Enable/disable verbose error reporting"); diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 522860ab6ce..554f046bcf2 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -58,7 +58,7 @@ ACPI_MODULE_NAME(ACPI_POWER_METER_NAME); #define POWER_ALARM_NAME "power1_alarm" static int cap_in_hardware; -static int force_cap_on; +static bool force_cap_on; static int can_cap_in_hardware(void) { diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index 1ad0a885c5a..0158cc35cb2 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -103,7 +103,7 @@ static int adm1021_remove(struct i2c_client *client); static struct adm1021_data *adm1021_update_device(struct device *dev); /* (amalysh) read only mode, otherwise any limit's writing confuse BIOS */ -static int read_only; +static bool read_only; static const struct i2c_device_id adm1021_id[] = { diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c index cfcc3b6fb6b..ed60242d6a0 100644 --- a/drivers/hwmon/ads7828.c +++ b/drivers/hwmon/ads7828.c @@ -48,8 +48,8 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END }; /* Module parameters */ -static int se_input = 1; /* Default is SE, 0 == diff */ -static int int_vref = 1; /* Default is internal ref ON */ +static bool se_input = 1; /* Default is SE, 0 == diff */ +static bool int_vref = 1; /* Default is internal ref ON */ static int vref_mv = ADS7828_INT_VREF_MV; /* set if vref != 2.5V */ module_param(se_input, bool, S_IRUGO); module_param(int_vref, bool, S_IRUGO); diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c index d9803958e49..ffb229af786 100644 --- a/drivers/hwmon/dme1737.c +++ b/drivers/hwmon/dme1737.c @@ -45,7 +45,7 @@ static struct platform_device *pdev; /* Module load parameters */ -static int force_start; +static bool force_start; module_param(force_start, bool, 0); MODULE_PARM_DESC(force_start, "Force the chip to start monitoring inputs"); @@ -53,7 +53,7 @@ static unsigned short force_id; module_param(force_id, ushort, 0); MODULE_PARM_DESC(force_id, "Override the detected device ID"); -static int probe_all_addr; +static bool probe_all_addr; module_param(probe_all_addr, bool, 0); MODULE_PARM_DESC(probe_all_addr, "Include probing of non-standard LPC " "addresses"); diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 38c0b87676d..603ef2af270 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -146,10 +146,10 @@ static inline void superio_exit(void) #define IT87_SIO_BEEP_PIN_REG 0xf6 /* Beep pin mapping */ /* Update battery voltage after every reading if true */ -static int update_vbat; +static bool update_vbat; /* Not all BIOSes properly configure the PWM registers */ -static int fix_pwm_polarity; +static bool fix_pwm_polarity; /* Many IT87 constants specified below */ diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c index 3b43df41861..8bd6c5c9e05 100644 --- a/drivers/hwmon/lm93.c +++ b/drivers/hwmon/lm93.c @@ -151,12 +151,12 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; /* Insmod parameters */ -static int disable_block; +static bool disable_block; module_param(disable_block, bool, 0); MODULE_PARM_DESC(disable_block, "Set to non-zero to disable SMBus block data transactions."); -static int init; +static bool init; module_param(init, bool, 0); MODULE_PARM_DESC(init, "Set to non-zero to force chip initialization."); diff --git a/drivers/hwmon/max1668.c b/drivers/hwmon/max1668.c index 6914195cfd3..88953f99e91 100644 --- a/drivers/hwmon/max1668.c +++ b/drivers/hwmon/max1668.c @@ -59,7 +59,7 @@ static unsigned short max1668_addr_list[] = { #define DEV_ID_MAX1989 0xb /* read only mode module parameter */ -static int read_only; +static bool read_only; module_param(read_only, bool, 0); MODULE_PARM_DESC(read_only, "Don't set any values, read only mode"); diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index bde50e34d01..374118f2b9f 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -71,7 +71,7 @@ module_param(force_i2c, byte, 0); MODULE_PARM_DESC(force_i2c, "Initialize the i2c address of the sensors"); -static int init = 1; +static bool init = 1; module_param(init, bool, 0); MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization"); diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index 65b685e2c7b..17a8fa2d9ae 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -67,11 +67,11 @@ module_param_array(force_subclients, short, NULL, 0); MODULE_PARM_DESC(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); -static int reset; +static bool reset; module_param(reset, bool, 0); MODULE_PARM_DESC(reset, "Set to one to reset chip on load"); -static int init = 1; +static bool init = 1; module_param(init, bool, 0); MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization"); diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index 6e5d0ae594b..35aa5149307 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -58,11 +58,11 @@ module_param_array(force_subclients, short, NULL, 0); MODULE_PARM_DESC(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); -static int reset; +static bool reset; module_param(reset, bool, 0); MODULE_PARM_DESC(reset, "Set to one to force a hardware chip reset"); -static int init; +static bool init; module_param(init, bool, 0); MODULE_PARM_DESC(init, "Set to one to force extra software initialization"); diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 9ded133e43f..d3100eab6b2 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -56,7 +56,7 @@ module_param_array(force_subclients, short, NULL, 0); MODULE_PARM_DESC(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); -static int init; +static bool init; module_param(init, bool, 0); MODULE_PARM_DESC(init, "Set to one to force chip initialization"); diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index 3cc6fef2208..45ec7e7c3c2 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -61,7 +61,7 @@ module_param_array(force_subclients, short, NULL, 0); MODULE_PARM_DESC(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); -static int reset; +static bool reset; module_param(reset, bool, 0); MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended"); diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 3ee398d0e4c..aa58b25565b 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -42,7 +42,7 @@ static const unsigned short normal_i2c[] = { }; -static int reset; +static bool reset; module_param(reset, bool, 0); MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended"); diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c index 0254e181893..063bd9508d8 100644 --- a/drivers/hwmon/w83l786ng.c +++ b/drivers/hwmon/w83l786ng.c @@ -39,7 +39,7 @@ static const unsigned short normal_i2c[] = { 0x2e, 0x2f, I2C_CLIENT_END }; /* Insmod parameters */ -static int reset; +static bool reset; module_param(reset, bool, 0); MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended"); diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c index 63bb1cc2a04..fa88868cb55 100644 --- a/drivers/i2c/busses/i2c-highlander.c +++ b/drivers/i2c/busses/i2c-highlander.c @@ -52,7 +52,7 @@ struct highlander_i2c_dev { size_t buf_len; }; -static int iic_force_poll, iic_force_normal; +static bool iic_force_poll, iic_force_normal; static int iic_timeout = 1000, iic_read_delay; static inline void highlander_i2c_irq_enable(struct highlander_i2c_dev *dev) diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 3c110fbc409..c08ceb957aa 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -51,11 +51,11 @@ MODULE_DESCRIPTION("IBM IIC driver v" DRIVER_VERSION); MODULE_LICENSE("GPL"); -static int iic_force_poll; +static bool iic_force_poll; module_param(iic_force_poll, bool, 0); MODULE_PARM_DESC(iic_force_poll, "Force polling mode"); -static int iic_force_fast; +static bool iic_force_fast; module_param(iic_force_fast, bool, 0); MODULE_PARM_DESC(iic_force_fast, "Force fast mode (400 kHz)"); diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index e6f539e26f6..58893772c3d 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -93,8 +93,8 @@ static struct pci_driver sis630_driver; /* insmod parameters */ -static int high_clock; -static int force; +static bool high_clock; +static bool force; module_param(high_clock, bool, 0); MODULE_PARM_DESC(high_clock, "Set Host Master Clock to 56KHz (default 14KHz)."); module_param(force, bool, 0); diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 0b012f1f8ac..2a62c998044 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -91,7 +91,7 @@ static unsigned short SMBHSTCFG = 0xD2; /* If force is set to anything different from 0, we forcibly enable the VT596. DANGEROUS! */ -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Forcibly enable the SMBus. DANGEROUS!"); diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c index 25b9fe3a9f8..d3be99fb415 100644 --- a/drivers/ide/ali14xx.c +++ b/drivers/ide/ali14xx.c @@ -221,7 +221,7 @@ static int __init ali14xx_probe(void) return ide_legacy_device_add(&ali14xx_port_info, 0); } -static int probe_ali14xx; +static bool probe_ali14xx; module_param_named(probe, probe_ali14xx, bool, 0); MODULE_PARM_DESC(probe, "probe for ALI M14xx chipsets"); diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index a81bd757579..14717304b38 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -111,7 +111,7 @@ #define DRV_NAME "cmd640" -static int cmd640_vlb; +static bool cmd640_vlb; /* * CMD640 specific registers definition. diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c index 6929f7fce93..46af4743b3e 100644 --- a/drivers/ide/dtc2278.c +++ b/drivers/ide/dtc2278.c @@ -130,7 +130,7 @@ static int __init dtc2278_probe(void) return ide_legacy_device_add(&dtc2278_port_info, 0); } -static int probe_dtc2278; +static bool probe_dtc2278; module_param_named(probe, probe_dtc2278, bool, 0); MODULE_PARM_DESC(probe, "probe for DTC2278xx chipsets"); diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c index 3feaa26410b..51beb85250d 100644 --- a/drivers/ide/gayle.c +++ b/drivers/ide/gayle.c @@ -50,7 +50,7 @@ GAYLE_NUM_HWIFS-1) #define GAYLE_HAS_CONTROL_REG (!ide_doubler) -static int ide_doubler; +static bool ide_doubler; module_param_named(doubler, ide_doubler, bool, 0); MODULE_PARM_DESC(doubler, "enable support for IDE doublers"); diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c index 808bcdcbf8e..986f2513eab 100644 --- a/drivers/ide/ht6560b.c +++ b/drivers/ide/ht6560b.c @@ -317,7 +317,7 @@ static void __init ht6560b_init_dev(ide_drive_t *drive) ide_set_drivedata(drive, (void *)t); } -static int probe_ht6560b; +static bool probe_ht6560b; module_param_named(probe, probe_ht6560b, bool, 0); MODULE_PARM_DESC(probe, "probe for HT6560B chipset"); diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c index 979d342c338..547d7cf2e01 100644 --- a/drivers/ide/ide-4drives.c +++ b/drivers/ide/ide-4drives.c @@ -6,7 +6,7 @@ #define DRV_NAME "ide-4drives" -static int probe_4drives; +static bool probe_4drives; module_param_named(probe, probe_4drives, bool, 0); MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port"); diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index f22edc66b03..f1a6796b165 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -53,15 +53,15 @@ struct ide_acpi_hwif_link { #define DEBPRINT(fmt, args...) do {} while (0) #endif /* DEBUGGING */ -static int ide_noacpi; +static bool ide_noacpi; module_param_named(noacpi, ide_noacpi, bool, 0); MODULE_PARM_DESC(noacpi, "disable IDE ACPI support"); -static int ide_acpigtf; +static bool ide_acpigtf; module_param_named(acpigtf, ide_acpigtf, bool, 0); MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support"); -static int ide_acpionboot; +static bool ide_acpionboot; module_param_named(acpionboot, ide_acpionboot, bool, 0); MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot"); diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c index a743e68a890..7f56b738d76 100644 --- a/drivers/ide/ide-pci-generic.c +++ b/drivers/ide/ide-pci-generic.c @@ -28,7 +28,7 @@ #define DRV_NAME "ide_pci_generic" -static int ide_generic_all; /* Set to claim all devices */ +static bool ide_generic_all; /* Set to claim all devices */ module_param_named(all_generic_ide, ide_generic_all, bool, 0444); MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers."); diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c index 3f0244fd8e6..8bbfe5557c7 100644 --- a/drivers/ide/qd65xx.c +++ b/drivers/ide/qd65xx.c @@ -417,7 +417,7 @@ static int __init qd_probe(int base) return rc; } -static int probe_qd65xx; +static bool probe_qd65xx; module_param_named(probe, probe_qd65xx, bool, 0); MODULE_PARM_DESC(probe, "probe for QD65xx chipsets"); diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c index 47adcd09cb2..5cfb7812066 100644 --- a/drivers/ide/umc8672.c +++ b/drivers/ide/umc8672.c @@ -160,7 +160,7 @@ static int __init umc8672_probe(void) return ide_legacy_device_add(&umc8672_port_info, 0); } -static int probe_umc8672; +static bool probe_umc8672; module_param_named(probe, probe_umc8672, bool, 0); MODULE_PARM_DESC(probe, "probe for UMC8672 chipset"); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index aaf6023a483..f08f6eaf3fa 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; extern int ehca_max_cq; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 8af8d4f7bdb..832e7a7d0ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); MODULE_VERSION(HCAD_VERSION); -static int ehca_open_aqp1 = 0; +static bool ehca_open_aqp1 = 0; static int ehca_hw_level = 0; -static int ehca_poll_all_eqs = 1; +static bool ehca_poll_all_eqs = 1; int ehca_debug_level = 0; int ehca_nr_ports = -1; -int ehca_use_hp_mr = 0; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_static_rate = -1; -int ehca_scaling_code = 0; +bool ehca_scaling_code = 0; int ehca_lock_hcalls = -1; int ehca_max_cq = -1; int ehca_max_qp = -1; diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 5965b3df8f2..7013da5e9ed 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -96,7 +96,7 @@ unsigned int wqm_quanta = 0x10000; module_param(wqm_quanta, int, 0644); MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); -static unsigned int limit_maxrdreqsz; +static bool limit_maxrdreqsz; module_param(limit_maxrdreqsz, bool, 0644); MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 32bbd4c77b7..fd7a0d5bc94 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -98,15 +98,15 @@ #define XTYPE_XBOX360W 2 #define XTYPE_UNKNOWN 3 -static int dpad_to_buttons; +static bool dpad_to_buttons; module_param(dpad_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads"); -static int triggers_to_buttons; +static bool triggers_to_buttons; module_param(triggers_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads"); -static int sticks_to_null; +static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 52b41934898..e2bdfd4bea7 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -48,7 +48,7 @@ MODULE_DESCRIPTION("Wistron laptop button driver"); MODULE_LICENSE("GPL v2"); MODULE_VERSION("0.3"); -static int force; /* = 0; */ +static bool force; /* = 0; */ module_param(force, bool, 0); MODULE_PARM_DESC(force, "Load even if computer is not in database"); diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index de7e8bc17b1..e6c9931f02c 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -60,7 +60,7 @@ static unsigned int psmouse_rate = 100; module_param_named(rate, psmouse_rate, uint, 0644); MODULE_PARM_DESC(rate, "Report rate, in reports per second."); -static unsigned int psmouse_smartscroll = 1; +static bool psmouse_smartscroll = 1; module_param_named(smartscroll, psmouse_smartscroll, bool, 0644); MODULE_PARM_DESC(smartscroll, "Logitech Smartscroll autorepeat, 1 = enabled (default), 0 = disabled."); diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c index 4b755cb5b38..1c58aafa523 100644 --- a/drivers/input/mouse/synaptics_i2c.c +++ b/drivers/input/mouse/synaptics_i2c.c @@ -185,17 +185,17 @@ #define NO_DATA_SLEEP_MSECS (MSEC_PER_SEC / 4) /* Control touchpad's No Deceleration option */ -static int no_decel = 1; +static bool no_decel = 1; module_param(no_decel, bool, 0644); MODULE_PARM_DESC(no_decel, "No Deceleration. Default = 1 (on)"); /* Control touchpad's Reduced Reporting option */ -static int reduce_report; +static bool reduce_report; module_param(reduce_report, bool, 0644); MODULE_PARM_DESC(reduce_report, "Reduced Reporting. Default = 0 (off)"); /* Control touchpad's No Filter option */ -static int no_filter; +static bool no_filter; module_param(no_filter, bool, 0644); MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)"); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 979c443bf1e..be3316073ae 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -105,7 +105,7 @@ EXPORT_SYMBOL(__hp_sdc_enqueue_transaction); EXPORT_SYMBOL(hp_sdc_enqueue_transaction); EXPORT_SYMBOL(hp_sdc_dequeue_transaction); -static unsigned int hp_sdc_disabled; +static bool hp_sdc_disabled; module_param_named(no_hpsdc, hp_sdc_disabled, bool, 0); MODULE_PARM_DESC(no_hpsdc, "Do not enable HP SDC driver."); diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c index 7f8f538a980..1df19bb8534 100644 --- a/drivers/input/touchscreen/eeti_ts.c +++ b/drivers/input/touchscreen/eeti_ts.c @@ -35,11 +35,11 @@ #include #include -static int flip_x; +static bool flip_x; module_param(flip_x, bool, 0644); MODULE_PARM_DESC(flip_x, "flip x coordinate"); -static int flip_y; +static bool flip_y; module_param(flip_y, bool, 0644); MODULE_PARM_DESC(flip_y, "flip y coordinate"); diff --git a/drivers/input/touchscreen/htcpen.c b/drivers/input/touchscreen/htcpen.c index 81e33862394..d13143b68b3 100644 --- a/drivers/input/touchscreen/htcpen.c +++ b/drivers/input/touchscreen/htcpen.c @@ -40,10 +40,10 @@ MODULE_LICENSE("GPL"); #define X_AXIS_MAX 2040 #define Y_AXIS_MAX 2040 -static int invert_x; +static bool invert_x; module_param(invert_x, bool, 0644); MODULE_PARM_DESC(invert_x, "If set, X axis is inverted"); -static int invert_y; +static bool invert_y; module_param(invert_y, bool, 0644); MODULE_PARM_DESC(invert_y, "If set, Y axis is inverted"); diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index d2b57536fee..46e83ad53f4 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -30,7 +30,7 @@ #define UCB1400_TS_POLL_PERIOD 10 /* ms */ -static int adcsync; +static bool adcsync; static int ts_delay = 55; /* us */ static int ts_delay_pressure; /* us */ diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index 06cef3ccc63..3a5ebf452e8 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -60,11 +60,11 @@ #define DRIVER_AUTHOR "Daniel Ritz " #define DRIVER_DESC "USB Touchscreen Driver" -static int swap_xy; +static bool swap_xy; module_param(swap_xy, bool, 0644); MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped."); -static int hwcalib_xy; +static bool hwcalib_xy; module_param(hwcalib_xy, bool, 0644); MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available"); diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c index 9c8d7aa053c..a0ed668d4d2 100644 --- a/drivers/isdn/hardware/avm/b1dma.c +++ b/drivers/isdn/hardware/avm/b1dma.c @@ -40,7 +40,7 @@ MODULE_DESCRIPTION("CAPI4Linux: DMA support for active AVM cards"); MODULE_AUTHOR("Carsten Paeth"); MODULE_LICENSE("GPL"); -static int suppress_pollack = 0; +static bool suppress_pollack = 0; module_param(suppress_pollack, bool, 0); /* ------------------------------------------------------------- */ diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index d3530f6e811..9743b24ef9d 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -40,7 +40,7 @@ static char *revision = "$Revision: 1.1.2.2 $"; /* ------------------------------------------------------------- */ -static int suppress_pollack; +static bool suppress_pollack; static struct pci_device_id c4_pci_tbl[] = { { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C4, 0, 0, (unsigned long)4 }, diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index ca710ab278e..023de789f25 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -30,7 +30,7 @@ static const char *boardname[] = { "DataCommute/BRI", "DataCommute/PRI", "TeleCo static unsigned int io[] = {0,0,0,0}; static unsigned char irq[] = {0,0,0,0}; static unsigned long ram[] = {0,0,0,0}; -static int do_reset = 0; +static bool do_reset = 0; module_param_array(io, int, NULL, 0); module_param_array(irq, int, NULL, 0); diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c index a498135a4e8..1ed1677c916 100644 --- a/drivers/leds/leds-clevo-mail.c +++ b/drivers/leds/leds-clevo-mail.c @@ -18,7 +18,7 @@ MODULE_AUTHOR("Márton Németh "); MODULE_DESCRIPTION("Clevo mail LED driver"); MODULE_LICENSE("GPL"); -static unsigned int __initdata nodetect; +static bool __initdata nodetect; module_param_named(nodetect, nodetect, bool, 0); MODULE_PARM_DESC(nodetect, "Skip DMI hardware detection"); diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c index 614ebebaaa2..57371e1485a 100644 --- a/drivers/leds/leds-ss4200.c +++ b/drivers/leds/leds-ss4200.c @@ -79,7 +79,7 @@ static int __init ss4200_led_dmi_callback(const struct dmi_system_id *id) return 1; } -static unsigned int __initdata nodetect; +static bool __initdata nodetect; module_param_named(nodetect, nodetect, bool, 0); MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection"); diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c index 399beb1638d..5c6a2d87656 100644 --- a/drivers/macintosh/ams/ams-core.c +++ b/drivers/macintosh/ams/ams-core.c @@ -31,7 +31,7 @@ /* There is only one motion sensor per machine */ struct ams ams_info; -static unsigned int verbose; +static bool verbose; module_param(verbose, bool, 0644); MODULE_PARM_DESC(verbose, "Show free falls and shocks in kernel output"); diff --git a/drivers/macintosh/ams/ams-input.c b/drivers/macintosh/ams/ams-input.c index 8a712392cd3..b27e530a87a 100644 --- a/drivers/macintosh/ams/ams-input.c +++ b/drivers/macintosh/ams/ams-input.c @@ -19,11 +19,11 @@ #include "ams.h" -static unsigned int joystick; +static bool joystick; module_param(joystick, bool, S_IRUGO); MODULE_PARM_DESC(joystick, "Enable the input class device on module load"); -static unsigned int invert; +static bool invert; module_param(invert, bool, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(invert, "Invert input data on X and Y axis"); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 02367308ff2..c60d025044e 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -52,7 +52,7 @@ static const char *sensor_location[3]; static int limit_adjust; static int fan_speed = -1; -static int verbose; +static bool verbose; MODULE_AUTHOR("Colin Leroy "); MODULE_DESCRIPTION("Driver for ADT746x thermostat in iBook G4 and " diff --git a/drivers/media/dvb/dvb-usb/af9005.c b/drivers/media/dvb/dvb-usb/af9005.c index bd51a764351..4fc024d7704 100644 --- a/drivers/media/dvb/dvb-usb/af9005.c +++ b/drivers/media/dvb/dvb-usb/af9005.c @@ -30,7 +30,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,rc=4,reg=8,i2c=16,fw=32 (or-able))." DVB_USB_DEBUG_STATUS); /* enable obnoxious led */ -int dvb_usb_af9005_led = 1; +bool dvb_usb_af9005_led = 1; module_param_named(led, dvb_usb_af9005_led, bool, 0644); MODULE_PARM_DESC(led, "enable led (default: 1)."); diff --git a/drivers/media/dvb/dvb-usb/af9005.h b/drivers/media/dvb/dvb-usb/af9005.h index c71c77bd7f4..6a2bf3de845 100644 --- a/drivers/media/dvb/dvb-usb/af9005.h +++ b/drivers/media/dvb/dvb-usb/af9005.h @@ -35,7 +35,7 @@ extern int dvb_usb_af9005_debug; #define deb_i2c(args...) dprintk(dvb_usb_af9005_debug,0x10,args) #define deb_fw(args...) dprintk(dvb_usb_af9005_debug,0x20,args) -extern int dvb_usb_af9005_led; +extern bool dvb_usb_af9005_led; /* firmware */ #define FW_BULKOUT_SIZE 250 diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c index edadc8449a3..36ce0611c03 100644 --- a/drivers/media/radio/radio-gemtek.c +++ b/drivers/media/radio/radio-gemtek.c @@ -47,11 +47,11 @@ MODULE_VERSION("0.0.4"); #endif static int io = CONFIG_RADIO_GEMTEK_PORT; -static int probe = CONFIG_RADIO_GEMTEK_PROBE; -static int hardmute; -static int shutdown = 1; -static int keepmuted = 1; -static int initmute = 1; +static bool probe = CONFIG_RADIO_GEMTEK_PROBE; +static bool hardmute; +static bool shutdown = 1; +static bool keepmuted = 1; +static bool initmute = 1; static int radio_nr = -1; module_param(io, int, 0444); diff --git a/drivers/media/radio/radio-miropcm20.c b/drivers/media/radio/radio-miropcm20.c index 3fb76e3834c..87c1ee13b05 100644 --- a/drivers/media/radio/radio-miropcm20.c +++ b/drivers/media/radio/radio-miropcm20.c @@ -23,7 +23,7 @@ static int radio_nr = -1; module_param(radio_nr, int, 0); MODULE_PARM_DESC(radio_nr, "Set radio device number (/dev/radioX). Default: -1 (autodetect)"); -static int mono; +static bool mono; module_param(mono, bool, 0); MODULE_PARM_DESC(mono, "Force tuner into mono mode."); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 27997a9ceb0..ca12d3289bf 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -38,7 +38,7 @@ #include #include -static int debug; +static bool debug; #define IRCTL_DEV_NAME "BaseRemoteCtl" #define NOPLUG -1 diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index 20bb12d6fbb..21105bf9594 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -156,9 +156,9 @@ /* module parameters */ #ifdef CONFIG_USB_DEBUG -static int debug = 1; +static bool debug = 1; #else -static int debug; +static bool debug; #endif #define mce_dbg(dev, fmt, ...) \ diff --git a/drivers/media/rc/streamzap.c b/drivers/media/rc/streamzap.c index b1d29d09eea..d6f4bfe0939 100644 --- a/drivers/media/rc/streamzap.c +++ b/drivers/media/rc/streamzap.c @@ -43,9 +43,9 @@ #define DRIVER_DESC "Streamzap Remote Control driver" #ifdef CONFIG_USB_DEBUG -static int debug = 1; +static bool debug = 1; #else -static int debug; +static bool debug; #endif #define USB_STREAMZAP_VENDOR_ID 0x0e9c diff --git a/drivers/media/rc/winbond-cir.c b/drivers/media/rc/winbond-cir.c index e7f7a57bf68..b09c5fae489 100644 --- a/drivers/media/rc/winbond-cir.c +++ b/drivers/media/rc/winbond-cir.c @@ -226,11 +226,11 @@ module_param(protocol, uint, 0444); MODULE_PARM_DESC(protocol, "IR protocol to use for the power-on command " "(0 = RC5, 1 = NEC, 2 = RC6A, default)"); -static int invert; /* default = 0 */ +static bool invert; /* default = 0 */ module_param(invert, bool, 0444); MODULE_PARM_DESC(invert, "Invert the signal from the IR receiver"); -static int txandrx; /* default = 0 */ +static bool txandrx; /* default = 0 */ module_param(txandrx, bool, 0444); MODULE_PARM_DESC(invert, "Allow simultaneous TX and RX"); diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c index cd8ff047318..fda32f52554 100644 --- a/drivers/media/video/c-qcam.c +++ b/drivers/media/video/c-qcam.c @@ -72,7 +72,7 @@ struct qcam { static int parport[MAX_CAMS] = { [1 ... MAX_CAMS-1] = -1 }; static int probe = 2; -static int force_rgb; +static bool force_rgb; static int video_nr = -1; /* FIXME: parport=auto would never have worked, surely? --RR */ diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c index 5909f2557ab..1d64af9adf7 100644 --- a/drivers/media/video/cs5345.c +++ b/drivers/media/video/cs5345.c @@ -31,7 +31,7 @@ MODULE_DESCRIPTION("i2c device driver for cs5345 Audio ADC"); MODULE_AUTHOR("Hans Verkuil"); MODULE_LICENSE("GPL"); -static int debug; +static bool debug; module_param(debug, bool, 0644); diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c index d93e5ab45fd..51c5b9ad67d 100644 --- a/drivers/media/video/cs53l32a.c +++ b/drivers/media/video/cs53l32a.c @@ -35,7 +35,7 @@ MODULE_DESCRIPTION("i2c device driver for cs53l32a Audio ADC"); MODULE_AUTHOR("Martin Vaughan"); MODULE_LICENSE("GPL"); -static int debug; +static bool debug; module_param(debug, bool, 0644); diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c index c6ff32a6137..349bd9c2aff 100644 --- a/drivers/media/video/cx18/cx18-driver.c +++ b/drivers/media/video/cx18/cx18-driver.c @@ -75,7 +75,7 @@ static int radio[CX18_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; static unsigned cardtype_c = 1; static unsigned tuner_c = 1; -static unsigned radio_c = 1; +static bool radio_c = 1; static char pal[] = "--"; static char secam[] = "--"; static char ntsc[] = "-"; diff --git a/drivers/media/video/cx25821/cx25821-alsa.c b/drivers/media/video/cx25821/cx25821-alsa.c index 09e99de5fd2..58be4f3bb3c 100644 --- a/drivers/media/video/cx25821/cx25821-alsa.c +++ b/drivers/media/video/cx25821/cx25821-alsa.c @@ -102,7 +102,7 @@ struct cx25821_audio_dev { static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ -static int enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 }; +static bool enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 }; module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable cx25821 soundcard. default enabled."); diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c index 68d1240f493..04bf6627d36 100644 --- a/drivers/media/video/cx88/cx88-alsa.c +++ b/drivers/media/video/cx88/cx88-alsa.c @@ -96,7 +96,7 @@ typedef struct cx88_audio_dev snd_cx88_card_t; static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static const char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ -static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1}; +static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1}; module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable cx88x soundcard. default enabled."); diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c index 9fe3816b2aa..0c449367543 100644 --- a/drivers/media/video/gspca/m5602/m5602_core.c +++ b/drivers/media/video/gspca/m5602/m5602_core.c @@ -27,8 +27,8 @@ /* Kernel module parameters */ int force_sensor; -static int dump_bridge; -int dump_sensor; +static bool dump_bridge; +bool dump_sensor; static const struct usb_device_id m5602_table[] = { {USB_DEVICE(0x0402, 0x5602)}, diff --git a/drivers/media/video/gspca/m5602/m5602_mt9m111.h b/drivers/media/video/gspca/m5602/m5602_mt9m111.h index b1f0c492036..8c672b5c8c6 100644 --- a/drivers/media/video/gspca/m5602/m5602_mt9m111.h +++ b/drivers/media/video/gspca/m5602/m5602_mt9m111.h @@ -106,7 +106,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int mt9m111_probe(struct sd *sd); int mt9m111_init(struct sd *sd); diff --git a/drivers/media/video/gspca/m5602/m5602_ov7660.h b/drivers/media/video/gspca/m5602/m5602_ov7660.h index 2efd607987e..2b6a13b508f 100644 --- a/drivers/media/video/gspca/m5602/m5602_ov7660.h +++ b/drivers/media/video/gspca/m5602/m5602_ov7660.h @@ -86,7 +86,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int ov7660_probe(struct sd *sd); int ov7660_init(struct sd *sd); diff --git a/drivers/media/video/gspca/m5602/m5602_ov9650.h b/drivers/media/video/gspca/m5602/m5602_ov9650.h index da9a129b739..f7aa5bf6898 100644 --- a/drivers/media/video/gspca/m5602/m5602_ov9650.h +++ b/drivers/media/video/gspca/m5602/m5602_ov9650.h @@ -135,7 +135,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int ov9650_probe(struct sd *sd); int ov9650_init(struct sd *sd); diff --git a/drivers/media/video/gspca/m5602/m5602_po1030.h b/drivers/media/video/gspca/m5602/m5602_po1030.h index 33835959639..81a2bcb88fe 100644 --- a/drivers/media/video/gspca/m5602/m5602_po1030.h +++ b/drivers/media/video/gspca/m5602/m5602_po1030.h @@ -147,7 +147,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int po1030_probe(struct sd *sd); int po1030_init(struct sd *sd); diff --git a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h index 8cc7a3f6da7..8e0035e731c 100644 --- a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h +++ b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h @@ -65,7 +65,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int s5k4aa_probe(struct sd *sd); int s5k4aa_init(struct sd *sd); diff --git a/drivers/media/video/gspca/m5602/m5602_s5k83a.h b/drivers/media/video/gspca/m5602/m5602_s5k83a.h index 80a63a236e2..79952247b53 100644 --- a/drivers/media/video/gspca/m5602/m5602_s5k83a.h +++ b/drivers/media/video/gspca/m5602/m5602_s5k83a.h @@ -41,7 +41,7 @@ /* Kernel module parameters */ extern int force_sensor; -extern int dump_sensor; +extern bool dump_sensor; int s5k83a_probe(struct sd *sd); int s5k83a_init(struct sd *sd); diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.c b/drivers/media/video/gspca/stv06xx/stv06xx.c index 0ab425fbea9..6f878f6c6e9 100644 --- a/drivers/media/video/gspca/stv06xx/stv06xx.c +++ b/drivers/media/video/gspca/stv06xx/stv06xx.c @@ -36,8 +36,8 @@ MODULE_AUTHOR("Erik Andrén"); MODULE_DESCRIPTION("STV06XX USB Camera Driver"); MODULE_LICENSE("GPL"); -static int dump_bridge; -static int dump_sensor; +static bool dump_bridge; +static bool dump_sensor; int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data) { diff --git a/drivers/media/video/hdpvr/hdpvr-core.c b/drivers/media/video/hdpvr/hdpvr-core.c index 3f1a5b1beeb..e5eb56a5b61 100644 --- a/drivers/media/video/hdpvr/hdpvr-core.c +++ b/drivers/media/video/hdpvr/hdpvr-core.c @@ -49,7 +49,7 @@ module_param(default_audio_input, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(default_audio_input, "default audio input: 0=RCA back / " "1=RCA front / 2=S/PDIF"); -static int boost_audio; +static bool boost_audio; module_param(boost_audio, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(boost_audio, "boost the audio signal"); diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c index 41108a9a195..544af91cbdc 100644 --- a/drivers/media/video/ivtv/ivtv-driver.c +++ b/drivers/media/video/ivtv/ivtv-driver.c @@ -99,7 +99,7 @@ static int i2c_clock_period[IVTV_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1, static unsigned int cardtype_c = 1; static unsigned int tuner_c = 1; -static unsigned int radio_c = 1; +static bool radio_c = 1; static unsigned int i2c_clock_period_c = 1; static char pal[] = "---"; static char secam[] = "--"; diff --git a/drivers/media/video/ivtv/ivtvfb.c b/drivers/media/video/ivtv/ivtvfb.c index 6b7c9c82333..d0fbfcf7133 100644 --- a/drivers/media/video/ivtv/ivtvfb.c +++ b/drivers/media/video/ivtv/ivtvfb.c @@ -58,7 +58,7 @@ /* card parameters */ static int ivtvfb_card_id = -1; static int ivtvfb_debug = 0; -static int osd_laced; +static bool osd_laced; static int osd_depth; static int osd_upper; static int osd_left; diff --git a/drivers/media/video/marvell-ccic/mcam-core.c b/drivers/media/video/marvell-ccic/mcam-core.c index 80ec64d2d6d..2c8fc0f6d69 100644 --- a/drivers/media/video/marvell-ccic/mcam-core.c +++ b/drivers/media/video/marvell-ccic/mcam-core.c @@ -51,7 +51,7 @@ static int delivered; * sense. */ -static int alloc_bufs_at_read; +static bool alloc_bufs_at_read; module_param(alloc_bufs_at_read, bool, 0444); MODULE_PARM_DESC(alloc_bufs_at_read, "Non-zero value causes DMA buffers to be allocated when the " @@ -73,11 +73,11 @@ MODULE_PARM_DESC(dma_buf_size, "parameters require larger buffers, an attempt to reallocate " "will be made."); #else /* MCAM_MODE_VMALLOC */ -static const int alloc_bufs_at_read = 0; +static const bool alloc_bufs_at_read = 0; static const int n_dma_bufs = 3; /* Used by S/G_PARM */ #endif /* MCAM_MODE_VMALLOC */ -static int flip; +static bool flip; module_param(flip, bool, 0444); MODULE_PARM_DESC(flip, "If set, the sensor will be instructed to flip the image " diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index d0f53885728..d7cd0f633f6 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -69,12 +69,12 @@ MODULE_LICENSE("GPL"); /* module parameters */ static int opmode = OPMODE_AUTO; int msp_debug; /* msp_debug output */ -int msp_once; /* no continuous stereo monitoring */ -int msp_amsound; /* hard-wire AM sound at 6.5 Hz (france), +bool msp_once; /* no continuous stereo monitoring */ +bool msp_amsound; /* hard-wire AM sound at 6.5 Hz (france), the autoscan seems work well only with FM... */ int msp_standard = 1; /* Override auto detect of audio msp_standard, if needed. */ -int msp_dolby; +bool msp_dolby; int msp_stereo_thresh = 0x190; /* a2 threshold for stereo/bilingual (msp34xxg only) 0x00a0-0x03c0 */ diff --git a/drivers/media/video/msp3400-driver.h b/drivers/media/video/msp3400-driver.h index 831e8db4368..fbe5e0715f9 100644 --- a/drivers/media/video/msp3400-driver.h +++ b/drivers/media/video/msp3400-driver.h @@ -44,10 +44,10 @@ /* module parameters */ extern int msp_debug; -extern int msp_once; -extern int msp_amsound; +extern bool msp_once; +extern bool msp_amsound; extern int msp_standard; -extern int msp_dolby; +extern bool msp_dolby; extern int msp_stereo_thresh; struct msp_state { diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c index ee0d0b39cd1..0de598bf66b 100644 --- a/drivers/media/video/omap/omap_vout.c +++ b/drivers/media/video/omap/omap_vout.c @@ -70,9 +70,9 @@ static u32 video1_numbuffers = 3; static u32 video2_numbuffers = 3; static u32 video1_bufsize = OMAP_VOUT_MAX_BUF_SIZE; static u32 video2_bufsize = OMAP_VOUT_MAX_BUF_SIZE; -static u32 vid1_static_vrfb_alloc; -static u32 vid2_static_vrfb_alloc; -static int debug; +static bool vid1_static_vrfb_alloc; +static bool vid2_static_vrfb_alloc; +static bool debug; /* Module parameters */ module_param(video1_numbuffers, uint, S_IRUGO); diff --git a/drivers/media/video/omap/omap_vout_vrfb.c b/drivers/media/video/omap/omap_vout_vrfb.c index ebebcac4922..4be26abf6ce 100644 --- a/drivers/media/video/omap/omap_vout_vrfb.c +++ b/drivers/media/video/omap/omap_vout_vrfb.c @@ -84,7 +84,7 @@ void omap_vout_free_vrfb_buffers(struct omap_vout_device *vout) } int omap_vout_setup_vrfb_bufs(struct platform_device *pdev, int vid_num, - u32 static_vrfb_allocation) + bool static_vrfb_allocation) { int ret = 0, i, j; struct omap_vout_device *vout; diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c index 8aa05853128..6a564964853 100644 --- a/drivers/media/video/ov7670.c +++ b/drivers/media/video/ov7670.c @@ -25,7 +25,7 @@ MODULE_AUTHOR("Jonathan Corbet "); MODULE_DESCRIPTION("A low-level driver for OmniVision ov7670 sensors"); MODULE_LICENSE("GPL"); -static int debug; +static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debug level (0-1)"); diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c index 5cfdbc78b91..0ef5484696b 100644 --- a/drivers/media/video/saa7115.c +++ b/drivers/media/video/saa7115.c @@ -57,7 +57,7 @@ MODULE_AUTHOR( "Maxim Yevtyushkin, Kevin Thayer, Chris Kennedy, " "Hans Verkuil, Mauro Carvalho Chehab"); MODULE_LICENSE("GPL"); -static int debug; +static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debug level (0-1)"); diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c index b7fb5a5cad7..3c61aec517a 100644 --- a/drivers/media/video/stk-webcam.c +++ b/drivers/media/video/stk-webcam.c @@ -38,11 +38,11 @@ #include "stk-webcam.h" -static int hflip = 1; +static bool hflip = 1; module_param(hflip, bool, 0444); MODULE_PARM_DESC(hflip, "Horizontal image flip (mirror). Defaults to 1"); -static int vflip = 1; +static bool vflip = 1; module_param(vflip, bool, 0444); MODULE_PARM_DESC(vflip, "Vertical image flip. Defaults to 1"); diff --git a/drivers/media/video/tm6000/tm6000-alsa.c b/drivers/media/video/tm6000/tm6000-alsa.c index 7d675c72fd4..bb2047c1035 100644 --- a/drivers/media/video/tm6000/tm6000-alsa.c +++ b/drivers/media/video/tm6000/tm6000-alsa.c @@ -42,7 +42,7 @@ static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ -static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1}; +static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1}; module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable tm6000x soundcard. default enabled."); diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c index 926f0393115..dd26cacd055 100644 --- a/drivers/media/video/tvp514x.c +++ b/drivers/media/video/tvp514x.c @@ -52,7 +52,7 @@ #define LOCK_RETRY_DELAY (200) /* Debug functions */ -static int debug; +static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debug level (0-1)"); diff --git a/drivers/media/video/tvp7002.c b/drivers/media/video/tvp7002.c index 7875e80cb2f..236c559d5f5 100644 --- a/drivers/media/video/tvp7002.c +++ b/drivers/media/video/tvp7002.c @@ -63,7 +63,7 @@ MODULE_LICENSE("GPL"); #define TVP7002_CL_MASK 0x0f /* Debug functions */ -static int debug; +static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debug level (0-2)"); diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c index 9bbe61700fd..65d065aa609 100644 --- a/drivers/media/video/upd64083.c +++ b/drivers/media/video/upd64083.c @@ -34,7 +34,7 @@ MODULE_DESCRIPTION("uPD64083 driver"); MODULE_AUTHOR("T. Adachi, Takeru KOMORIYA, Hans Verkuil"); MODULE_LICENSE("GPL"); -static int debug; +static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Debug level (0-1)"); diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c index cbf13d09b4a..bfae41ba53c 100644 --- a/drivers/media/video/via-camera.c +++ b/drivers/media/video/via-camera.c @@ -34,13 +34,13 @@ MODULE_AUTHOR("Jonathan Corbet "); MODULE_DESCRIPTION("VIA framebuffer-based camera controller driver"); MODULE_LICENSE("GPL"); -static int flip_image; +static bool flip_image; module_param(flip_image, bool, 0444); MODULE_PARM_DESC(flip_image, "If set, the sensor will be instructed to flip the image " "vertically."); -static int override_serial; +static bool override_serial; module_param(override_serial, bool, 0444); MODULE_PARM_DESC(override_serial, "The camera driver will normally refuse to load if " diff --git a/drivers/media/video/zoran/zoran_device.c b/drivers/media/video/zoran/zoran_device.c index e8a27844bf3..e86173bd132 100644 --- a/drivers/media/video/zoran/zoran_device.c +++ b/drivers/media/video/zoran/zoran_device.c @@ -57,7 +57,7 @@ ZR36057_ISR_GIRQ1 | \ ZR36057_ISR_JPEGRepIRQ ) -static int lml33dpath; /* default = 0 +static bool lml33dpath; /* default = 0 * 1 will use digital path in capture * mode instead of analog. It can be * used for picture adjustments using diff --git a/drivers/media/video/zoran/zr36060.c b/drivers/media/video/zoran/zr36060.c index 5e4f57cbf31..f08546fe223 100644 --- a/drivers/media/video/zoran/zr36060.c +++ b/drivers/media/video/zoran/zr36060.c @@ -50,7 +50,7 @@ /* amount of chips attached via this driver */ static int zr36060_codecs; -static int low_bitrate; +static bool low_bitrate; module_param(low_bitrate, bool, 0); MODULE_PARM_DESC(low_bitrate, "Buz compatibility option, halves bitrate"); diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 6ce70e9615d..5319e9b6584 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -21,7 +21,7 @@ #define DRIVER_NAME "jmb38x_ms" -static int no_dma; +static bool no_dma; module_param(no_dma, bool, 0644); enum { diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 668f5c6a039..29b2172ae18 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -23,7 +23,7 @@ #include #include "r592.h" -static int r592_enable_dma = 1; +static bool r592_enable_dma = 1; static int debug; static const char *tpc_names[] = { diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index b7aacf47703..6902b83eb1b 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -22,7 +22,7 @@ #define DRIVER_NAME "tifm_ms" -static int no_dma; +static bool no_dma; module_param(no_dma, bool, 0644); /* diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c index b1f4563be9a..701eb600b12 100644 --- a/drivers/misc/iwmc3200top/main.c +++ b/drivers/misc/iwmc3200top/main.c @@ -376,20 +376,20 @@ static int blocks; module_param(blocks, int, 0604); MODULE_PARM_DESC(blocks, "max_blocks_to_send"); -static int dump; +static bool dump; module_param(dump, bool, 0604); MODULE_PARM_DESC(dump, "dump_hex_content"); -static int jump = 1; +static bool jump = 1; module_param(jump, bool, 0604); -static int direct = 1; +static bool direct = 1; module_param(direct, bool, 0604); -static int checksum = 1; +static bool checksum = 1; module_param(checksum, bool, 0604); -static int fw_download = 1; +static bool fw_download = 1; module_param(fw_download, bool, 0604); static int block_size = IWMC_SDIO_BLK_SIZE; @@ -398,7 +398,7 @@ module_param(block_size, int, 0404); static int download_trans_blks = IWMC_DEFAULT_TR_BLK; module_param(download_trans_blks, int, 0604); -static int rubbish_barker; +static bool rubbish_barker; module_param(rubbish_barker, bool, 0604); #ifdef CONFIG_IWMC3200TOP_DEBUG diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 950b97d7412..75d7d7e1736 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -48,7 +48,7 @@ static struct workqueue_struct *workqueue; * performance cost, and for other reasons may not always be desired. * So we allow it it to be disabled. */ -int use_spi_crc = 1; +bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); /* @@ -58,9 +58,9 @@ module_param(use_spi_crc, bool, 0); * overridden if necessary. */ #ifdef CONFIG_MMC_UNSAFE_RESUME -int mmc_assume_removable; +bool mmc_assume_removable; #else -int mmc_assume_removable = 1; +bool mmc_assume_removable = 1; #endif EXPORT_SYMBOL(mmc_assume_removable); module_param_named(removable, mmc_assume_removable, bool, 0644); diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 14664f1fb16..afa6bd2b7b7 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -64,7 +64,7 @@ int mmc_attach_sd(struct mmc_host *host); int mmc_attach_sdio(struct mmc_host *host); /* Module parameters */ -extern int use_spi_crc; +extern bool use_spi_crc; /* Debugfs information for hosts and cards */ void mmc_add_host_debugfs(struct mmc_host *host); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index f70d04664ca..69d249f51d6 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -22,8 +22,8 @@ #define DRIVER_NAME "tifm_sd" #define DRIVER_VERSION "0.8" -static int no_dma = 0; -static int fixed_timeout = 0; +static bool no_dma = 0; +static bool fixed_timeout = 0; module_param(no_dma, bool, 0644); module_param(fixed_timeout, bool, 0644); diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 2ec978bc32b..3135a1a5d75 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -223,25 +223,25 @@ enum SD_RESPONSE_TYPE { #define FUN(c) (0x000007 & (c->arg>>28)) #define REG(c) (0x01FFFF & (c->arg>>9)) -static int limit_speed_to_24_MHz; +static bool limit_speed_to_24_MHz; module_param(limit_speed_to_24_MHz, bool, 0644); MODULE_PARM_DESC(limit_speed_to_24_MHz, "Limit Max SDIO Clock Speed to 24 MHz"); -static int pad_input_to_usb_pkt; +static bool pad_input_to_usb_pkt; module_param(pad_input_to_usb_pkt, bool, 0644); MODULE_PARM_DESC(pad_input_to_usb_pkt, "Pad USB data input transfers to whole USB Packet"); -static int disable_offload_processing; +static bool disable_offload_processing; module_param(disable_offload_processing, bool, 0644); MODULE_PARM_DESC(disable_offload_processing, "Disable Offload Processing"); -static int force_1_bit_data_xfers; +static bool force_1_bit_data_xfers; module_param(force_1_bit_data_xfers, bool, 0644); MODULE_PARM_DESC(force_1_bit_data_xfers, "Force SDIO Data Transfers to 1-bit Mode"); -static int force_polling_for_irqs; +static bool force_polling_for_irqs; module_param(force_polling_for_irqs, bool, 0644); MODULE_PARM_DESC(force_polling_for_irqs, "Force Polling for SDIO interrupts"); diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 8544d6bf50a..5c3d719c37e 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -185,7 +185,7 @@ struct pxa3xx_nand_info { uint32_t ndcb2; }; -static int use_dma = 1; +static bool use_dma = 1; module_param(use_dma, bool, 0444); MODULE_PARM_DESC(use_dma, "enable DMA for data transferring to/from NAND HW"); diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index f20f393bfda..769a4e096b3 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -22,7 +22,7 @@ #include "r852.h" -static int r852_enable_dma = 1; +static bool r852_enable_dma = 1; module_param(r852_enable_dma, bool, S_IRUGO); MODULE_PARM_DESC(r852_enable_dma, "Enable usage of the DMA (default)"); diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c index 0dc34f12f92..d4716273651 100644 --- a/drivers/parport/parport_ip32.c +++ b/drivers/parport/parport_ip32.c @@ -135,7 +135,7 @@ #define PARPORT_IP32_ENABLE_EPP (1U << 3) #define PARPORT_IP32_ENABLE_ECP (1U << 4) static unsigned int features = ~0U; -static int verbose_probing = DEFAULT_VERBOSE_PROBING; +static bool verbose_probing = DEFAULT_VERBOSE_PROBING; /* We do not support more than one port. */ static struct parport *this_port = NULL; diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 095f29e1373..2a47e82821d 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -44,7 +44,7 @@ #define METHOD_NAME__SUN "_SUN" #define METHOD_NAME_OSHP "OSHP" -static int debug_acpi; +static bool debug_acpi; static acpi_status decode_type0_hpx_record(union acpi_object *record, struct hotplug_params *hpx) diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index efa9f2de51c..aa41631e9e0 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -47,7 +47,7 @@ /* name size which is used for entries in pcihpfs */ #define SLOT_NAME_SIZE 21 /* {_SUN} */ -static int debug; +static bool debug; int acpiphp_debug; /* local variables */ diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index e525263210e..c35e8ad6db0 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -43,7 +43,7 @@ #define DRIVER_AUTHOR "Irene Zubarev , Vernon Mauery " #define DRIVER_DESC "ACPI Hot Plug PCI Controller Driver IBM extension" -static int debug; +static bool debug; MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c index 41f6a8d79c8..6bf8d2ab164 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.c +++ b/drivers/pci/hotplug/cpcihp_zt5550.c @@ -57,8 +57,8 @@ #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg) /* local variables */ -static int debug; -static int poll; +static bool debug; +static bool poll; static struct cpci_hp_controller_ops zt5550_hpc_ops; static struct cpci_hp_controller zt5550_hpc; diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index f1ce99cceac..187a199da93 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -57,8 +57,8 @@ struct irq_routing_table *cpqhp_routing_table; static void __iomem *smbios_table; static void __iomem *smbios_start; static void __iomem *cpqhp_rom_start; -static int power_mode; -static int debug; +static bool power_mode; +static bool debug; static int initialized; #define DRIVER_VERSION "0.9.8" diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index d934dd4fa87..5506e0e8fbc 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -49,7 +49,7 @@ int ibmphp_debug; -static int debug; +static bool debug; module_param(debug, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC (debug, "Debugging mode enabled or not"); MODULE_LICENSE ("GPL"); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 6d2eea93298..202f4a969eb 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -51,7 +51,7 @@ /* local variables */ -static int debug; +static bool debug; #define DRIVER_VERSION "0.5" #define DRIVER_AUTHOR "Greg Kroah-Hartman , Scott Murray " diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 9a33fdde2d1..4b7cce1de6e 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -40,10 +40,10 @@ #define MY_NAME "pciehp" -extern int pciehp_poll_mode; +extern bool pciehp_poll_mode; extern int pciehp_poll_time; -extern int pciehp_debug; -extern int pciehp_force; +extern bool pciehp_debug; +extern bool pciehp_force; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index b8c99d35ac9..365c6b96c64 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -38,10 +38,10 @@ #include /* Global variables */ -int pciehp_debug; -int pciehp_poll_mode; +bool pciehp_debug; +bool pciehp_poll_mode; int pciehp_poll_time; -int pciehp_force; +bool pciehp_force; struct workqueue_struct *pciehp_wq; #define DRIVER_VERSION "0.4" diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c index 5175d9b26f0..b20ceaaa31f 100644 --- a/drivers/pci/hotplug/pcihp_skeleton.c +++ b/drivers/pci/hotplug/pcihp_skeleton.c @@ -59,7 +59,7 @@ static LIST_HEAD(slot_list); #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg) /* local variables */ -static int debug; +static bool debug; static int num_slots; #define DRIVER_VERSION "0.3" diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h index 419919a87b0..df5677440a0 100644 --- a/drivers/pci/hotplug/rpaphp.h +++ b/drivers/pci/hotplug/rpaphp.h @@ -46,7 +46,7 @@ #define PRESENT 1 /* Card in slot */ #define MY_NAME "rpaphp" -extern int rpaphp_debug; +extern bool rpaphp_debug; #define dbg(format, arg...) \ do { \ if (rpaphp_debug) \ diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 758adb5f47f..127d6e60018 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -37,7 +37,7 @@ /* and pci_do_scan_bus */ #include "rpaphp.h" -int rpaphp_debug; +bool rpaphp_debug; LIST_HEAD(rpaphp_slot_head); #define DRIVER_VERSION "0.1" diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index e0c90e643b5..ca64932e658 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -43,9 +43,9 @@ #define MY_NAME THIS_MODULE->name #endif -extern int shpchp_poll_mode; +extern bool shpchp_poll_mode; extern int shpchp_poll_time; -extern int shpchp_debug; +extern bool shpchp_debug; extern struct workqueue_struct *shpchp_wq; extern struct workqueue_struct *shpchp_ordered_wq; diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index dd7e0c51a33..7414fd9ad1d 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -36,8 +36,8 @@ #include "shpchp.h" /* Global variables */ -int shpchp_debug; -int shpchp_poll_mode; +bool shpchp_debug; +bool shpchp_poll_mode; int shpchp_poll_time; struct workqueue_struct *shpchp_wq; struct workqueue_struct *shpchp_ordered_wq; diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index 95489cd9a55..52229863e9f 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -28,7 +28,7 @@ #include "aerdrv.h" /* Override the existing corrected and uncorrected error masks */ -static int aer_mask_override; +static bool aer_mask_override; module_param(aer_mask_override, bool, 0); struct aer_error_inj { diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9674e9f30d4..0ca05353814 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -27,8 +27,8 @@ #include #include "aerdrv.h" -static int forceload; -static int nosourceid; +static bool forceload; +static bool nosourceid; module_param(forceload, bool, 0); module_param(nosourceid, bool, 0); diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 9dc565c615b..849c0c11d2a 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -24,15 +24,15 @@ #include "yenta_socket.h" #include "i82365.h" -static int disable_clkrun; +static bool disable_clkrun; module_param(disable_clkrun, bool, 0444); MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option"); -static int isa_probe = 1; +static bool isa_probe = 1; module_param(isa_probe, bool, 0444); MODULE_PARM_DESC(isa_probe, "If set ISA interrupts are probed (default). Set to N to disable probing"); -static int pwr_irqs_off; +static bool pwr_irqs_off; module_param(pwr_irqs_off, bool, 0644); MODULE_PARM_DESC(pwr_irqs_off, "Force IRQs off during power-on of slot. Use only when seeing IRQ storms!"); diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 8877b836d27..d9673447832 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -189,7 +189,7 @@ struct compal_data{ /* =============== */ /* General globals */ /* =============== */ -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c index 7f88c7923fc..6ee0b5c9093 100644 --- a/drivers/platform/x86/intel_oaktrail.c +++ b/drivers/platform/x86/intel_oaktrail.c @@ -95,7 +95,7 @@ #define OT_EC_BL_CONTROL_ON_DATA 0x1A -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index f204643c505..bb5132128b3 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -89,7 +89,7 @@ static int msi_laptop_resume(struct platform_device *device); #define MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS 0x2f -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index 09e26bfd464..fd73ea89b85 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -228,12 +228,12 @@ static struct platform_device *sdev; static struct rfkill *rfk; static bool has_stepping_quirk; -static int force; +static bool force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Disable the DMI check and forces the driver to be loaded"); -static int debug; +static bool debug; module_param(debug, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug, "Debug enabled or not"); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 62533c105da..ea0c6075b72 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -378,13 +378,13 @@ static unsigned int bright_maxlvl; /* 0 = unknown */ #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES static int dbg_wlswemul; -static int tpacpi_wlsw_emulstate; +static bool tpacpi_wlsw_emulstate; static int dbg_bluetoothemul; -static int tpacpi_bluetooth_emulstate; +static bool tpacpi_bluetooth_emulstate; static int dbg_wwanemul; -static int tpacpi_wwan_emulstate; +static bool tpacpi_wwan_emulstate; static int dbg_uwbemul; -static int tpacpi_uwb_emulstate; +static bool tpacpi_uwb_emulstate; #endif @@ -6444,7 +6444,7 @@ static struct ibm_struct brightness_driver_data = { static int alsa_index = ~((1 << (SNDRV_CARDS - 3)) - 1); /* last three slots */ static char *alsa_id = "ThinkPadEC"; -static int alsa_enable = SNDRV_DEFAULT_ENABLE1; +static bool alsa_enable = SNDRV_DEFAULT_ENABLE1; struct tpacpi_alsa_data { struct snd_card *card; @@ -6487,7 +6487,7 @@ static enum tpacpi_volume_access_mode volume_mode = TPACPI_VOL_MODE_MAX; static enum tpacpi_volume_capabilities volume_capabilities; -static int volume_control_allowed; +static bool volume_control_allowed; /* * Used to syncronize writers to TP_EC_AUDIO and @@ -7265,7 +7265,7 @@ enum fan_control_commands { * and also watchdog cmd */ }; -static int fan_control_allowed; +static bool fan_control_allowed; static enum fan_status_access_mode fan_status_access_mode; static enum fan_control_access_mode fan_control_access_mode; @@ -8437,7 +8437,7 @@ static struct proc_dir_entry *proc_dir; * Module and infrastructure proble, init and exit handling */ -static int force_load; +static bool force_load; #ifdef CONFIG_THINKPAD_ACPI_DEBUG static const char * __init str_supported(int is_supported) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index a134c26870b..42a4dcc25f9 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -82,12 +82,12 @@ struct wmi_block { #define ACPI_WMI_STRING 0x4 /* GUID takes & returns a string */ #define ACPI_WMI_EVENT 0x8 /* GUID is an event */ -static int debug_event; +static bool debug_event; module_param(debug_event, bool, 0444); MODULE_PARM_DESC(debug_event, "Log WMI Events [0/1]"); -static int debug_dump_wdg; +static bool debug_dump_wdg; module_param(debug_dump_wdg, bool, 0444); MODULE_PARM_DESC(debug_dump_wdg, "Dump available WMI interfaces [0/1]"); diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index 545874b1df9..076e211a40b 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -64,7 +64,7 @@ static unsigned int cache_time = 1000; module_param(cache_time, uint, 0644); MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); -static unsigned int pmod_enabled; +static bool pmod_enabled; module_param(pmod_enabled, bool, 0644); MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit"); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index e5cb9248a44..f3b8bb84faf 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -75,7 +75,7 @@ static LIST_HEAD(raw3270_devices); static int raw3270_registered; /* Module parameters */ -static int tubxcorrect = 0; +static bool tubxcorrect = 0; module_param(tubxcorrect, bool, 0); /* diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c index 11312f401c7..2211277a107 100644 --- a/drivers/s390/char/vmwatchdog.c +++ b/drivers/s390/char/vmwatchdog.c @@ -28,9 +28,9 @@ #define MAX_CMDLEN 240 #define MIN_INTERVAL 15 static char vmwdt_cmd[MAX_CMDLEN] = "IPL"; -static int vmwdt_conceal; +static bool vmwdt_conceal; -static int vmwdt_nowayout = WATCHDOG_NOWAYOUT; +static bool vmwdt_nowayout = WATCHDOG_NOWAYOUT; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnd Bergmann "); diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c index 195823a51aa..ed119cedaae 100644 --- a/drivers/scsi/aha1542.c +++ b/drivers/scsi/aha1542.c @@ -102,7 +102,7 @@ static int setup_dmaspeed[MAXBOARDS] __initdata = { -1, -1, -1, -1 }; */ #if defined(MODULE) -static int isapnp = 0; +static bool isapnp = 0; static int aha1542[] = {0x330, 11, 4, -1}; module_param_array(aha1542, int, NULL, 0); module_param(isapnp, bool, 0); diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index f5b718d3c31..13aeca3d51f 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -546,7 +546,7 @@ static struct ParameterData __devinitdata cfg_data[] = { * command line overrides will be used. If set to 1 then safe and * slow settings will be used. */ -static int use_safe_settings = 0; +static bool use_safe_settings = 0; module_param_named(safe, use_safe_settings, bool, 0); MODULE_PARM_DESC(safe, "Use safe and slow settings only. Default: false"); diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c index f6a50c98c36..002924963cd 100644 --- a/drivers/scsi/nsp32.c +++ b/drivers/scsi/nsp32.c @@ -59,11 +59,11 @@ MODULE_PARM_DESC(trans_mode, "transfer mode (0: BIOS(default) 1: Async 2: Ultra2 #define ASYNC_MODE 1 #define ULTRA20M_MODE 2 -static int auto_param = 0; /* default: ON */ +static bool auto_param = 0; /* default: ON */ module_param (auto_param, bool, 0); MODULE_PARM_DESC(auto_param, "AutoParameter mode (0: ON(default) 1: OFF)"); -static int disc_priv = 1; /* default: OFF */ +static bool disc_priv = 1; /* default: OFF */ module_param (disc_priv, bool, 0); MODULE_PARM_DESC(disc_priv, "disconnection privilege mode (0: ON 1: OFF(default))"); diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index ca86721a71b..b61a753eb89 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -70,7 +70,7 @@ module_param(nsp_burst_mode, int, 0); MODULE_PARM_DESC(nsp_burst_mode, "Burst transfer mode (0=io8, 1=io32, 2=mem32(default))"); /* Release IO ports after configuration? */ -static int free_ports = 0; +static bool free_ports = 0; module_param(free_ports, bool, 0); MODULE_PARM_DESC(free_ports, "Release IO ports after configuration? (default: 0 (=no))"); diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 0d18d80bcd2..9bcf87ae4c0 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -61,7 +61,7 @@ EXPORT_SYMBOL(comedi_debug); module_param(comedi_debug, int, 0644); #endif -int comedi_autoconfig = 1; +bool comedi_autoconfig = 1; module_param(comedi_autoconfig, bool, 0444); static int comedi_num_legacy_minors; diff --git a/drivers/staging/comedi/comedi_fops.h b/drivers/staging/comedi/comedi_fops.h index da4b4f5553f..006cf14c577 100644 --- a/drivers/staging/comedi/comedi_fops.h +++ b/drivers/staging/comedi/comedi_fops.h @@ -1,10 +1,11 @@ #ifndef _COMEDI_FOPS_H #define _COMEDI_FOPS_H +#include extern struct class *comedi_class; extern const struct file_operations comedi_fops; -extern int comedi_autoconfig; +extern bool comedi_autoconfig; extern struct comedi_driver *comedi_drivers; #endif /* _COMEDI_FOPS_H */ diff --git a/drivers/staging/media/go7007/snd-go7007.c b/drivers/staging/media/go7007/snd-go7007.c index deac938d850..d071c838ac2 100644 --- a/drivers/staging/media/go7007/snd-go7007.c +++ b/drivers/staging/media/go7007/snd-go7007.c @@ -38,7 +38,7 @@ static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; -static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; +static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; module_param_array(index, int, NULL, 0444); module_param_array(id, charp, NULL, 0444); diff --git a/drivers/staging/media/lirc/lirc_bt829.c b/drivers/staging/media/lirc/lirc_bt829.c index c5a0d27a02d..4d20e9f7411 100644 --- a/drivers/staging/media/lirc/lirc_bt829.c +++ b/drivers/staging/media/lirc/lirc_bt829.c @@ -53,7 +53,7 @@ static unsigned char do_get_bits(void); #define DRIVER_NAME "lirc_bt829" -static int debug; +static bool debug; #define dprintk(fmt, args...) \ do { \ if (debug) \ diff --git a/drivers/staging/media/lirc/lirc_igorplugusb.c b/drivers/staging/media/lirc/lirc_igorplugusb.c index 6cd4cd67a1d..7a250177667 100644 --- a/drivers/staging/media/lirc/lirc_igorplugusb.c +++ b/drivers/staging/media/lirc/lirc_igorplugusb.c @@ -62,9 +62,9 @@ /* debugging support */ #ifdef CONFIG_USB_DEBUG -static int debug = 1; +static bool debug = 1; #else -static int debug; +static bool debug; #endif #define dprintk(fmt, args...) \ diff --git a/drivers/staging/media/lirc/lirc_parallel.c b/drivers/staging/media/lirc/lirc_parallel.c index 02b07a6c177..dd2bca7b56f 100644 --- a/drivers/staging/media/lirc/lirc_parallel.c +++ b/drivers/staging/media/lirc/lirc_parallel.c @@ -63,8 +63,8 @@ /*** Global Variables ***/ -static int debug; -static int check_pselecd; +static bool debug; +static bool check_pselecd; unsigned int irq = LIRC_IRQ; unsigned int io = LIRC_PORT; diff --git a/drivers/staging/media/lirc/lirc_serial.c b/drivers/staging/media/lirc/lirc_serial.c index 8a060a8a722..2aac67c9828 100644 --- a/drivers/staging/media/lirc/lirc_serial.c +++ b/drivers/staging/media/lirc/lirc_serial.c @@ -107,13 +107,13 @@ struct lirc_serial { static int type; static int io; static int irq; -static int iommap; +static bool iommap; static int ioshift; -static int softcarrier = 1; -static int share_irq; -static int debug; +static bool softcarrier = 1; +static bool share_irq; +static bool debug; static int sense = -1; /* -1 = auto, 0 = active high, 1 = active low */ -static int txsense; /* 0 = active high, 1 = active low */ +static bool txsense; /* 0 = active high, 1 = active low */ #define dprintk(fmt, args...) \ do { \ diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c index 6903d3992ec..c94382b917a 100644 --- a/drivers/staging/media/lirc/lirc_sir.c +++ b/drivers/staging/media/lirc/lirc_sir.c @@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(hardware_lock); static int rx_buf[RBUF_LEN]; static unsigned int rx_tail, rx_head; -static int debug; +static bool debug; #define dprintk(fmt, args...) \ do { \ if (debug) \ diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c index 0302d82a12f..76ea4a8f2c7 100644 --- a/drivers/staging/media/lirc/lirc_zilog.c +++ b/drivers/staging/media/lirc/lirc_zilog.c @@ -155,8 +155,8 @@ static struct mutex tx_data_lock; #define zilog_info(s, args...) printk(KERN_INFO KBUILD_MODNAME ": " s, ## args) /* module parameters */ -static int debug; /* debug output */ -static int tx_only; /* only handle the IR Tx function */ +static bool debug; /* debug output */ +static bool tx_only; /* only handle the IR Tx function */ static int minor = -1; /* minor number */ #define dprintk(fmt, args...) \ diff --git a/drivers/staging/quatech_usb2/quatech_usb2.c b/drivers/staging/quatech_usb2/quatech_usb2.c index 02fafecd477..897a3a99c79 100644 --- a/drivers/staging/quatech_usb2/quatech_usb2.c +++ b/drivers/staging/quatech_usb2/quatech_usb2.c @@ -16,7 +16,7 @@ #include #include -static int debug; +static bool debug; /* Version Information */ #define DRIVER_VERSION "v2.00" diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c index c44e41af288..1c5780f1571 100644 --- a/drivers/staging/serqt_usb2/serqt_usb2.c +++ b/drivers/staging/serqt_usb2/serqt_usb2.c @@ -16,7 +16,7 @@ #include #include -static int debug; +static bool debug; /* Version Information */ #define DRIVER_VERSION "v2.14" diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h index 412b87947f6..e66579e6147 100644 --- a/drivers/staging/speakup/speakup.h +++ b/drivers/staging/speakup/speakup.h @@ -116,7 +116,7 @@ extern int bleep_time, bell_pos; extern int spell_delay, key_echo; extern short punc_mask; extern short pitch_shift, synth_flags; -extern int quiet_boot; +extern bool quiet_boot; extern char *synth_name; extern struct bleep unprocessed_sound; diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c index c241074a4b5..2222d6919ef 100644 --- a/drivers/staging/speakup/synth.c +++ b/drivers/staging/speakup/synth.c @@ -22,7 +22,7 @@ static struct spk_synth *synths[MAXSYNTHS]; struct spk_synth *synth; char pitch_buff[32] = ""; static int module_status; -int quiet_boot; +bool quiet_boot; struct speakup_info_t speakup_info = { .spinlock = __SPIN_LOCK_UNLOCKED(speakup_info.spinlock), diff --git a/drivers/staging/vme/bridges/vme_tsi148.c b/drivers/staging/vme/bridges/vme_tsi148.c index 08a449b4abf..f50582169b2 100644 --- a/drivers/staging/vme/bridges/vme_tsi148.c +++ b/drivers/staging/vme/bridges/vme_tsi148.c @@ -41,7 +41,7 @@ static void __exit tsi148_exit(void); /* Module parameter */ -static int err_chk; +static bool err_chk; static int geoid; static const char driver_name[] = "vme_tsi148"; diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c index 6a1241c7f84..de88aa5566e 100644 --- a/drivers/tty/rocket.c +++ b/drivers/tty/rocket.c @@ -118,7 +118,7 @@ static unsigned long board2; static unsigned long board3; static unsigned long board4; static unsigned long controller; -static int support_low_speed; +static bool support_low_speed; static unsigned long modem1; static unsigned long modem2; static unsigned long modem3; diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index e67fb20490d..ff8017f8791 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -850,7 +850,7 @@ static int mgsl_device_count; * .text section address and breakpoint on module load. * This is useful for use with gdb and add-symbol-file command. */ -static int break_on_load; +static bool break_on_load; /* * Driver major number, defaults to zero to get auto diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 0f6b796c95c..a7efe538df0 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -456,7 +456,7 @@ static int synclinkmp_device_count = 0; * .text section address and breakpoint on module load. * This is useful for use with gdb and add-symbol-file command. */ -static int break_on_load = 0; +static bool break_on_load = 0; /* * Driver major number, defaults to zero to get auto diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c index b42092e1f16..98dd9e49b68 100644 --- a/drivers/usb/atm/speedtch.c +++ b/drivers/usb/atm/speedtch.c @@ -73,9 +73,9 @@ static const char speedtch_driver_name[] = "speedtch"; #define DEFAULT_SW_BUFFERING 0 static unsigned int altsetting = 0; /* zero means: use the default */ -static int dl_512_first = DEFAULT_DL_512_FIRST; -static int enable_isoc = DEFAULT_ENABLE_ISOC; -static int sw_buffering = DEFAULT_SW_BUFFERING; +static bool dl_512_first = DEFAULT_DL_512_FIRST; +static bool enable_isoc = DEFAULT_ENABLE_ISOC; +static bool sw_buffering = DEFAULT_SW_BUFFERING; #define DEFAULT_B_MAX_DSL 8128 #define DEFAULT_MODEM_MODE 11 diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index 00f171a7a8a..01ea5d7421d 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -542,7 +542,7 @@ static int modem_index; static unsigned int debug; static unsigned int altsetting[NB_MODEM] = { [0 ... (NB_MODEM - 1)] = FASTEST_ISO_INTF}; -static int sync_wait[NB_MODEM]; +static bool sync_wait[NB_MODEM]; static char *cmv_file[NB_MODEM]; static int annex[NB_MODEM]; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 3af5e2dd1d8..8df4b76465a 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -93,7 +93,7 @@ struct async { u8 bulk_status; }; -static int usbfs_snoop; +static bool usbfs_snoop; module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic"); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 79d339e2e70..a0613d8f9be 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -102,7 +102,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khubd_wait); static struct task_struct *khubd_task; /* cycle leds on hubs that aren't blinking for attention */ -static int blinkenlights = 0; +static bool blinkenlights = 0; module_param (blinkenlights, bool, S_IRUGO); MODULE_PARM_DESC (blinkenlights, "true to cycle leds on hubs"); @@ -131,12 +131,12 @@ MODULE_PARM_DESC(initial_descriptor_timeout, * otherwise the new scheme is used. If that fails and "use_both_schemes" * is set, then the driver will make another attempt, using the other scheme. */ -static int old_scheme_first = 0; +static bool old_scheme_first = 0; module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(old_scheme_first, "start with the old device initialization scheme"); -static int use_both_schemes = 1; +static bool use_both_schemes = 1; module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(use_both_schemes, "try the other device initialization scheme if the " @@ -2026,7 +2026,7 @@ static unsigned hub_is_wusb(struct usb_hub *hub) #define SET_ADDRESS_TRIES 2 #define GET_DESCRIPTOR_TRIES 2 #define SET_CONFIG_TRIES (2 * (use_both_schemes + 1)) -#define USE_NEW_SCHEME(i) ((i) / 2 == old_scheme_first) +#define USE_NEW_SCHEME(i) ((i) / 2 == (int)old_scheme_first) #define HUB_ROOT_RESET_TIME 50 /* times are in msec */ #define HUB_SHORT_RESET_TIME 10 diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 1382c90d083..8ca9f994a28 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -47,7 +47,7 @@ const char *usbcore_name = "usbcore"; -static int nousb; /* Disable USB when built into kernel image */ +static bool nousb; /* Disable USB when built into kernel image */ #ifdef CONFIG_USB_SUSPEND static int usb_autosuspend_delay = 2; /* Default delay value, diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index e9a2c5c4445..c16ff55a74e 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -152,15 +152,15 @@ static const char *ep_string[] = { }; /* DMA usage flag */ -static int use_dma = 1; +static bool use_dma = 1; /* packet per buffer dma */ -static int use_dma_ppb = 1; +static bool use_dma_ppb = 1; /* with per descr. update */ -static int use_dma_ppb_du; +static bool use_dma_ppb_du; /* buffer fill mode */ static int use_dma_bufferfill_mode; /* full speed only mode */ -static int use_fullspeed; +static bool use_fullspeed; /* tx buffer size for high speed */ static unsigned long hs_tx_buf = UDC_EPIN_BUFF_SIZE; diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index 0cd764d5935..a28f6ffcd0f 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -250,9 +250,9 @@ static struct usb_configuration rndis_config_driver = { /*-------------------------------------------------------------------------*/ #ifdef CONFIG_USB_ETH_EEM -static int use_eem = 1; +static bool use_eem = 1; #else -static int use_eem; +static bool use_eem; #endif module_param(use_eem, bool, 0); MODULE_PARM_DESC(use_eem, "use CDC EEM mode"); diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index e0f30fc70e4..47766f0e7ca 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -303,16 +303,16 @@ MODULE_LICENSE("Dual BSD/GPL"); static struct { char *file[FSG_MAX_LUNS]; char *serial; - int ro[FSG_MAX_LUNS]; - int nofua[FSG_MAX_LUNS]; + bool ro[FSG_MAX_LUNS]; + bool nofua[FSG_MAX_LUNS]; unsigned int num_filenames; unsigned int num_ros; unsigned int num_nofuas; unsigned int nluns; - int removable; - int can_stall; - int cdrom; + bool removable; + bool can_stall; + bool cdrom; char *transport_parm; char *protocol_parm; diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c index 4c81d540bc2..7322d293213 100644 --- a/drivers/usb/gadget/net2272.c +++ b/drivers/usb/gadget/net2272.c @@ -69,7 +69,7 @@ static const char * const ep_name[] = { * * If use_dma is disabled, pio will be used instead. */ -static int use_dma = 0; +static bool use_dma = 0; module_param(use_dma, bool, 0644); /* diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index cf1f36454d0..cdedd133674 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -90,8 +90,8 @@ static const char *const ep_name [] = { * Some gadget drivers work better with the dma support here than others. * These two parameters let you use PIO or more aggressive DMA. */ -static int use_dma = 1; -static int use_dma_chaining = 0; +static bool use_dma = 1; +static bool use_dma_chaining = 0; /* "modprobe net2280 use_dma=n" etc */ module_param (use_dma, bool, S_IRUGO); @@ -112,7 +112,7 @@ module_param (fifo_mode, ushort, 0644); * USB suspend requests will be ignored. This is acceptable for * self-powered devices */ -static int enable_suspend = 0; +static bool enable_suspend = 0; /* "modprobe net2280 enable_suspend=1" etc */ module_param (enable_suspend, bool, S_IRUGO); diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 7db5bbe6251..576cd8578b4 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -98,7 +98,7 @@ module_param (fifo_mode, uint, 0); MODULE_PARM_DESC (fifo_mode, "endpoint configuration"); #ifdef USE_DMA -static unsigned use_dma = 1; +static bool use_dma = 1; /* "modprobe omap_udc use_dma=y", or else as a kernel * boot parameter "omap_udc:use_dma=y" diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c index dd2313cce1d..a3fcaae4bc2 100644 --- a/drivers/usb/gadget/pch_udc.c +++ b/drivers/usb/gadget/pch_udc.c @@ -359,7 +359,7 @@ struct pch_udc_dev { static const char ep0_string[] = "ep0in"; static DEFINE_SPINLOCK(udc_stall_spinlock); /* stall spin lock */ struct pch_udc_dev *pch_udc; /* pointer to device object */ -static int speed_fs; +static bool speed_fs; module_param_named(speed_fs, speed_fs, bool, S_IRUGO); MODULE_PARM_DESC(speed_fs, "true for Full speed operation"); diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index ed1b816e58d..ad9e5b2df64 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -123,11 +123,11 @@ MODULE_AUTHOR("Al Borchers"); MODULE_AUTHOR("David Brownell"); MODULE_LICENSE("GPL"); -static int use_acm = true; +static bool use_acm = true; module_param(use_acm, bool, 0); MODULE_PARM_DESC(use_acm, "Use CDC ACM, default=yes"); -static int use_obex = false; +static bool use_obex = false; module_param(use_obex, bool, 0); MODULE_PARM_DESC(use_obex, "Use CDC OBEX, default=no"); diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 20697cc132d..31d34832907 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -81,7 +81,7 @@ module_param(buflen, uint, 0); * work better with hosts where config changes are problematic or * controllers (like original superh) that only support one config. */ -static int loopdefault = 0; +static bool loopdefault = 0; module_param(loopdefault, bool, S_IRUGO|S_IWUSR); /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index e311a511529..a007a9fe0f8 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -112,7 +112,7 @@ module_param (park, uint, S_IRUGO); MODULE_PARM_DESC (park, "park setting; 1-3 back-to-back async packets"); /* for flakey hardware, ignore overcurrent indicators */ -static int ignore_oc = 0; +static bool ignore_oc = 0; module_param (ignore_oc, bool, S_IRUGO); MODULE_PARM_DESC (ignore_oc, "ignore bogus hardware overcurrent indications"); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 5f5a6324143..34b9edd8665 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -115,13 +115,13 @@ static inline void sb800_prefetch(struct ohci_hcd *ohci, int on) /* Some boards misreport power switching/overcurrent */ -static int distrust_firmware = 1; +static bool distrust_firmware = 1; module_param (distrust_firmware, bool, 0); MODULE_PARM_DESC (distrust_firmware, "true to distrust firmware power/overcurrent setup"); /* Some boards leave IR set wrongly, since they fail BIOS/SMM handshakes */ -static int no_handshake = 0; +static bool no_handshake = 0; module_param (no_handshake, bool, 0); MODULE_PARM_DESC (no_handshake, "true (not default) disables BIOS handshake"); diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index 6f62de5c6e3..015c7c62ed4 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -233,7 +233,7 @@ module_param(park, uint, S_IRUGO); MODULE_PARM_DESC(park, "park setting; 1-3 back-to-back async packets"); /* For flakey hardware, ignore overcurrent indicators */ -static int ignore_oc; +static bool ignore_oc; module_param(ignore_oc, bool, S_IRUGO); MODULE_PARM_DESC(ignore_oc, "ignore bogus hardware overcurrent indications"); diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index 533d12cca37..16dd6a6abf0 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -74,7 +74,7 @@ MODULE_LICENSE("GPL"); #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444) INT_MODULE_PARM(testing, 0); /* Some boards misreport power switching/overcurrent*/ -static int distrust_firmware = 1; +static bool distrust_firmware = 1; module_param(distrust_firmware, bool, 0); MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren" "t setup"); diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index c8ae199cfbb..6b5eb1017e2 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -59,7 +59,7 @@ #define DRIVER_DESC "USB Universal Host Controller Interface driver" /* for flakey hardware, ignore overcurrent indicators */ -static int ignore_oc; +static bool ignore_oc; module_param(ignore_oc, bool, S_IRUGO); MODULE_PARM_DESC(ignore_oc, "ignore hardware overcurrent indications"); diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 2dbe600fbc1..a4a3c7cd4a1 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -53,7 +53,7 @@ MODULE_AUTHOR("Tony Olech"); MODULE_DESCRIPTION("FTDI ELAN driver"); MODULE_LICENSE("GPL"); #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444) -static int distrust_firmware = 1; +static bool distrust_firmware = 1; module_param(distrust_firmware, bool, 0); MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren" "t setup"); diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 2453a39b479..4fd0dc835ae 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -62,7 +62,7 @@ MODULE_LICENSE("GPL"); /* Module parameters */ static DEFINE_MUTEX(iowarrior_mutex); -static int debug = 0; +static bool debug = 0; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "debug=1 enables debugging messages"); diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 53be7aef630..66bc376005d 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -750,7 +750,7 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx) * So this module parameter lets the heuristic be disabled. When using * gadgetfs, the heuristic will probably need to be disabled. */ -static int cppi_rx_rndis = 1; +static bool cppi_rx_rndis = 1; module_param(cppi_rx_rndis, bool, 0); MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic"); diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index f6ff7923048..56cf0243979 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1586,7 +1586,7 @@ irqreturn_t musb_interrupt(struct musb *musb) EXPORT_SYMBOL_GPL(musb_interrupt); #ifndef CONFIG_MUSB_PIO_ONLY -static int __initdata use_dma = 1; +static bool __initdata use_dma = 1; /* "modprobe ... use_dma=0" etc */ module_param(use_dma, bool, 0); diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c index b43d07df4c4..123bf915533 100644 --- a/drivers/usb/serial/aircable.c +++ b/drivers/usb/serial/aircable.c @@ -52,7 +52,7 @@ #include #include -static int debug; +static bool debug; /* Vendor and Product ID */ #define AIRCABLE_VID 0x16CA diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c index 18e875b92e0..69328dcfd91 100644 --- a/drivers/usb/serial/ark3116.c +++ b/drivers/usb/serial/ark3116.c @@ -37,7 +37,7 @@ #include #include -static int debug; +static bool debug; /* * Version information */ diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index f9f29b289f2..29ffeb6279c 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -37,7 +37,7 @@ #include #include "belkin_sa.h" -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 0e77511060c..5e53cc59e65 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -70,7 +70,7 @@ #define CH341_NBREAK_BITS_REG2 0x40 -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x4348, 0x5523) }, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index adfe660ed00..fba1147ed91 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -49,7 +49,7 @@ static void cp210x_break_ctl(struct tty_struct *, int); static int cp210x_startup(struct usb_serial *); static void cp210x_dtr_rts(struct usb_serial_port *p, int on); -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */ diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 98bf8334983..6bc3802a581 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -43,7 +43,7 @@ #define CYBERJACK_LOCAL_BUF_SIZE 32 -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 07680d6b792..3bdeafa29c2 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -46,10 +46,10 @@ #include "cypress_m8.h" -static int debug; -static int stats; +static bool debug; +static bool stats; static int interval; -static int unstable_bauds; +static bool unstable_bauds; /* * Version Information diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 6d26a77d0f2..b23bebd721a 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -251,7 +251,7 @@ static int digi_read_oob_callback(struct urb *urb); /* Statics */ -static int debug; +static bool debug; static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(DIGI_VENDOR_ID, DIGI_2_ID) }, diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index 504b5585ea4..aced6817bf9 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -28,7 +28,7 @@ #include #include -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c290df97108..01b6404df39 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -55,7 +55,7 @@ #define DRIVER_AUTHOR "Greg Kroah-Hartman , Bill Ryder , Kuba Ober , Andreas Mohr, Johan Hovold " #define DRIVER_DESC "USB FTDI Serial Converters Driver" -static int debug; +static bool debug; static __u16 vendor = FTDI_VID; static __u16 product; diff --git a/drivers/usb/serial/funsoft.c b/drivers/usb/serial/funsoft.c index e21ce9ddfc6..5d4b099dcf8 100644 --- a/drivers/usb/serial/funsoft.c +++ b/drivers/usb/serial/funsoft.c @@ -16,7 +16,7 @@ #include #include -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1404, 0xcddc) }, diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index bf12565f8e8..21343378c32 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -42,7 +42,7 @@ static int initial_mode = 1; /* debug flag */ -static int debug; +static bool debug; #define GARMIN_VENDOR_ID 0x091E diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index abd2ee2b2f9..0497575e479 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -191,7 +191,7 @@ static const struct divisor_table_entry divisor_table[] = { }; /* local variables */ -static int debug; +static bool debug; static atomic_t CmdUrbs; /* Number of outstanding Command Write Urbs */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index e44d375edaa..65bf06aa591 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -210,10 +210,10 @@ static unsigned char OperationalMajorVersion; static unsigned char OperationalMinorVersion; static unsigned short OperationalBuildNumber; -static int debug; +static bool debug; static int closing_wait = EDGE_CLOSING_WAIT; -static int ignore_cpu_rev; +static bool ignore_cpu_rev; static int default_uart_mode; /* RS232 */ static void edge_tty_recv(struct device *dev, struct tty_struct *tty, diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index 36f5cbe9048..06053a920dd 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -34,7 +34,7 @@ #define DRIVER_DESC "USB PocketPC PDA driver" static __u16 product, vendor; -static int debug; +static bool debug; static int connect_retries = KP_RETRIES; static int initial_wait; diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index 5170799d6e9..6f9356f3f99 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -147,7 +147,7 @@ static struct usb_driver usb_ipw_driver = { .no_dynamic_id = 1, }; -static int debug; +static bool debug; static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port) { diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 0c537da0d3c..84a396e8367 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -45,7 +45,7 @@ #define DRIVER_AUTHOR "Greg Kroah-Hartman , Johan Hovold " #define DRIVER_DESC "USB IR Dongle driver" -static int debug; +static bool debug; /* if overridden by the user, then use their value for the size of the read and * write urbs */ diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 64d0ffd4440..3077a443697 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -34,9 +34,9 @@ #ifdef CONFIG_USB_SERIAL_DEBUG -static int debug = 1; +static bool debug = 1; #else -static int debug; +static bool debug; #endif /* @@ -65,7 +65,7 @@ static int clockmode = 1; static int cdmode = 1; static int iuu_cardin; static int iuu_cardout; -static int xmas; +static bool xmas; static int vcc_default = 5; static void read_rxcmd_callback(struct urb *urb); diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index bc8dc203e81..4cc36c76180 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -45,7 +45,7 @@ #include #include "keyspan.h" -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index a40615674a6..7c62a704830 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -31,7 +31,7 @@ #include #include -static int debug; +static bool debug; /* make a simple define to handle if we are compiling keyspan_pda or xircom support */ #if defined(CONFIG_USB_SERIAL_KEYSPAN_PDA) || defined(CONFIG_USB_SERIAL_KEYSPAN_PDA_MODULE) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 19373cb7c5b..fc064e1442c 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -49,7 +49,7 @@ #include #include "kl5kusb105.h" -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index a975bb80303..27fa9c8a77b 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -45,7 +45,7 @@ #define DRIVER_AUTHOR "Wolfgang Grandegger " #define DRIVER_DESC "Magic Control Technology USB-RS232 converter driver" -static int debug; +static bool debug; /* * Function prototypes diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 19d112f51b9..4554ee49e63 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -71,7 +71,7 @@ struct moschip_port { struct urb *write_urb_pool[NUM_URBS]; }; -static int debug; +static bool debug; static struct usb_serial_driver moschip7720_2port_driver; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 55cfd6265b9..03b5e249e95 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -263,7 +263,7 @@ struct moschip_port { }; -static int debug; +static bool debug; /* * mos7840_set_reg_sync diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index 1f00f243c26..b28f1db0195 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -21,7 +21,7 @@ #include #include -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0a99, 0x0001) }, /* Talon Technology device */ diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index 45a8c55881d..8b8d58a2ac1 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -23,7 +23,7 @@ #include #include -static int debug; +static bool debug; /* * Version Information diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index 691f57a9d71..262ded9e076 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -32,7 +32,7 @@ * an examples of 1D barcode types are EAN, UPC, Code39, IATA etc.. */ #define DRIVER_DESC "Opticon USB barcode to serial driver (1D)" -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x065a, 0x0009) }, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c96b6b6509f..420d9857394 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1234,7 +1234,7 @@ static struct usb_serial_driver option_1port_device = { #endif }; -static int debug; +static bool debug; /* per port private data */ diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index 2161d1c3c08..e287fd32682 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -74,7 +74,7 @@ static struct usb_driver oti6858_driver = { .no_dynamic_id = 1, }; -static int debug; +static bool debug; /* requests */ #define OTI6858_REQ_GET_STATUS (USB_DIR_IN | USB_TYPE_VENDOR | 0x00) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 329295615d0..3d8cda57ce7 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -36,7 +36,7 @@ */ #define DRIVER_DESC "Prolific PL2303 USB to serial adaptor driver" -static int debug; +static bool debug; #define PL2303_CLOSING_WAIT (30*HZ) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index aa9367f5b42..1d5deee3be5 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,7 +22,7 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { {USB_DEVICE(0x05c6, 0x9211)}, /* Acer Gobi QDL device */ diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index a36e2313eed..d074b3740dc 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -81,9 +81,9 @@ #define CONFIG_USB_SERIAL_SAFE_PADDED 0 #endif -static int debug; -static int safe = 1; -static int padded = CONFIG_USB_SERIAL_SAFE_PADDED; +static bool debug; +static bool safe = 1; +static bool padded = CONFIG_USB_SERIAL_SAFE_PADDED; #define DRIVER_VERSION "v0.1" #define DRIVER_AUTHOR "sl@lineo.com, tbr@lineo.com, Johan Hovold " diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index f2485429172..fdae0a4407c 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -46,8 +46,8 @@ allocations > PAGE_SIZE and the number of packets in a page is an integer 512 is the largest possible packet on EHCI */ -static int debug; -static int nmea; +static bool debug; +static bool nmea; /* Used in interface blacklisting */ struct sierra_iface_info { diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 180ea6c7911..d7f5eee18f0 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -33,7 +33,7 @@ #define DRIVER_VERSION "v0.10" #define DRIVER_DESC "SPCP8x5 USB to serial adaptor driver" -static int debug; +static bool debug; #define SPCP8x5_007_VID 0x04FC #define SPCP8x5_007_PID 0x0201 diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index 87362e48796..7697858d885 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -46,7 +46,7 @@ #define FULLPWRBIT 0x00000080 #define NEXT_BOARD_POWER_BIT 0x00000004 -static int debug; +static bool debug; /* Version Information */ #define DRIVER_VERSION "v0.1" diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index c70cc012d03..50651cf4fc6 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -20,7 +20,7 @@ #include #include -static int debug; +static bool debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x05e0, 0x0600) }, diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 4af21f46096..8468eb769a2 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -150,7 +150,7 @@ static int ti_download_firmware(struct ti_device *tdev); /* Data */ /* module parameters */ -static int debug; +static bool debug; static int closing_wait = TI_DEFAULT_CLOSING_WAIT; static ushort vendor_3410[TI_EXTRA_VID_PID_COUNT]; static unsigned int vendor_3410_count; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index ce6c1a65a54..611b206591c 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -61,7 +61,7 @@ static struct usb_driver usb_serial_driver = { drivers depend on it. */ -static int debug; +static bool debug; /* initially all NULL */ static struct usb_serial *serial_table[SERIAL_TTY_MINORS]; static DEFINE_MUTEX(table_lock); diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index d555ca9567b..c88657dd31c 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -37,7 +37,7 @@ #include #include "usb-wwan.h" -static int debug; +static bool debug; void usb_wwan_dtr_rts(struct usb_serial_port *port, int on) { diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 1c11959a7d5..210e4b10dc1 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -52,7 +52,7 @@ static int palm_os_4_probe(struct usb_serial *serial, const struct usb_device_id *id); /* Parameters that may be passed into the module. */ -static int debug; +static bool debug; static __u16 vendor; static __u16 product; diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 11af903cb09..7e0acf5c8e3 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -36,7 +36,7 @@ #include #include "whiteheat.h" /* WhiteHEAT specific commands */ -static int debug; +static bool debug; #ifndef CMSPAR #define CMSPAR 0 diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 44bdce4242a..622f12b62a4 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -301,9 +301,9 @@ static struct fb_ops atyfb_ops = { .fb_sync = atyfb_sync, }; -static int noaccel; +static bool noaccel; #ifdef CONFIG_MTRR -static int nomtrr; +static bool nomtrr; #endif static int vram; static int pll; diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 150684882ef..ce1506b75ad 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -263,19 +263,19 @@ static reg_val common_regs[] = { static char *mode_option; static char *monitor_layout; -static int noaccel = 0; +static bool noaccel = 0; static int default_dynclk = -2; -static int nomodeset = 0; -static int ignore_edid = 0; -static int mirror = 0; +static bool nomodeset = 0; +static bool ignore_edid = 0; +static bool mirror = 0; static int panel_yres = 0; -static int force_dfp = 0; -static int force_measure_pll = 0; +static bool force_dfp = 0; +static bool force_measure_pll = 0; #ifdef CONFIG_MTRR -static int nomtrr = 0; +static bool nomtrr = 0; #endif -static int force_sleep; -static int ignore_devlist; +static bool force_sleep; +static bool ignore_devlist; #ifdef CONFIG_PMAC_BACKLIGHT static int backlight = 1; #else diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c index 6df7c54db0a..6fb499e7678 100644 --- a/drivers/video/cirrusfb.c +++ b/drivers/video/cirrusfb.c @@ -350,7 +350,7 @@ struct cirrusfb_info { void (*unmap)(struct fb_info *info); }; -static int noaccel __devinitdata; +static bool noaccel __devinitdata; static char *mode_option __devinitdata = "640x480@60"; /****************************************************************************/ diff --git a/drivers/video/hgafb.c b/drivers/video/hgafb.c index 4394389caf6..c645f928265 100644 --- a/drivers/video/hgafb.c +++ b/drivers/video/hgafb.c @@ -133,7 +133,7 @@ static struct fb_fix_screeninfo hga_fix __devinitdata = { /* Don't assume that tty1 will be the initial current console. */ static int release_io_port = 0; static int release_io_ports = 0; -static int nologo = 0; +static bool nologo = 0; /* ------------------------------------------------------------------------- * diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c index 5ba39999105..c94c91fd866 100644 --- a/drivers/video/intelfb/intelfbdrv.c +++ b/drivers/video/intelfb/intelfbdrv.c @@ -230,15 +230,15 @@ MODULE_DESCRIPTION("Framebuffer driver for Intel(R) " SUPPORTED_CHIPSETS MODULE_LICENSE("Dual BSD/GPL"); MODULE_DEVICE_TABLE(pci, intelfb_pci_table); -static int accel = 1; +static bool accel = 1; static int vram = 4; -static int hwcursor = 0; -static int mtrr = 1; -static int fixed = 0; -static int noinit = 0; -static int noregister = 0; -static int probeonly = 0; -static int idonly = 0; +static bool hwcursor = 0; +static bool mtrr = 1; +static bool fixed = 0; +static bool noinit = 0; +static bool noregister = 0; +static bool probeonly = 0; +static bool idonly = 0; static int bailearly = 0; static int voffset = 48; static char *mode = NULL; diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index ea7a8ccc830..080c35b34bb 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -21,7 +21,7 @@ #include #endif -static int nologo; +static bool nologo; module_param(nologo, bool, 0); MODULE_PARM_DESC(nologo, "Disables startup logo"); diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index feea7b1dc38..fb3f6739110 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -84,11 +84,11 @@ /* --------------------------------------------------------------------- */ -static int internal; -static int external; -static int libretto; -static int nostretch; -static int nopciburst; +static bool internal; +static bool external; +static bool libretto; +static bool nostretch; +static bool nopciburst; static char *mode_option __devinitdata = NULL; #ifdef MODULE diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c index 25d8e510319..b291bfaac80 100644 --- a/drivers/video/omap/omapfb_main.c +++ b/drivers/video/omap/omapfb_main.c @@ -47,9 +47,9 @@ static unsigned int def_rotate; static unsigned int def_mirror; #ifdef CONFIG_FB_OMAP_MANUAL_UPDATE -static int manual_update = 1; +static bool manual_update = 1; #else -static int manual_update; +static bool manual_update; #endif static struct platform_device *fbdev_pdev; diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index 86ec12e16c7..da7b1857654 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -50,7 +50,7 @@ module_param_named(def_disp, def_disp_name, charp, 0); MODULE_PARM_DESC(def_disp, "default display name"); #ifdef DEBUG -unsigned int dss_debug; +bool dss_debug; module_param_named(debug, dss_debug, bool, 0644); #endif diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 5abf8e7e745..46f37883e49 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -340,8 +340,8 @@ struct dsi_packet_sent_handler_data { static struct platform_device *dsi_pdev_map[MAX_NUM_DSI]; #ifdef DEBUG -static unsigned int dsi_perf; -module_param_named(dsi_perf, dsi_perf, bool, 0644); +static bool dsi_perf; +module_param(dsi_perf, bool, 0644); #endif static inline struct dsi_data *dsi_get_dsidrv_data(struct platform_device *dsidev) diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h index 6308fc59fc9..57a52eecee9 100644 --- a/drivers/video/omap2/dss/dss.h +++ b/drivers/video/omap2/dss/dss.h @@ -28,7 +28,7 @@ #endif #ifdef DEBUG -extern unsigned int dss_debug; +extern bool dss_debug; #ifdef DSS_SUBSYS_NAME #define DSSDBG(format, ...) \ if (dss_debug) \ diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c index 70aa47de714..68ba1f80008 100644 --- a/drivers/video/omap2/omapfb/omapfb-main.c +++ b/drivers/video/omap2/omapfb/omapfb-main.c @@ -43,18 +43,18 @@ static char *def_mode; static char *def_vram; -static int def_vrfb; +static bool def_vrfb; static int def_rotate; -static int def_mirror; +static bool def_mirror; static bool auto_update; static unsigned int auto_update_freq; module_param(auto_update, bool, 0); module_param(auto_update_freq, uint, 0644); #ifdef DEBUG -unsigned int omapfb_debug; +bool omapfb_debug; module_param_named(debug, omapfb_debug, bool, 0644); -static unsigned int omapfb_test_pattern; +static bool omapfb_test_pattern; module_param_named(test, omapfb_test_pattern, bool, 0644); #endif diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h index fdf0edeccf4..e12d384ea52 100644 --- a/drivers/video/omap2/omapfb/omapfb.h +++ b/drivers/video/omap2/omapfb/omapfb.h @@ -32,7 +32,7 @@ #include