diff options
| author | Sage Weil <sage@newdream.net> | 2011-05-24 11:46:31 -0700 | 
|---|---|---|
| committer | Sage Weil <sage@newdream.net> | 2011-05-24 11:52:12 -0700 | 
| commit | db3540522e955c1ebb391f4f5324dff4f20ecd09 (patch) | |
| tree | 8c25b07caa8614345c71f09e8872e60b68af4c31 | |
| parent | cd634fb6eec72ef8e6dd677546b8d0ffdd2501eb (diff) | |
ceph: fix cap flush race reentrancy
In e9964c10 we change cap flushing to do a delicate dance because some
inodes on the cap_dirty list could be in a migrating state (got EXPORT but
not IMPORT) in which we couldn't actually flush and move from
dirty->flushing, breaking the while (!empty) { process first } loop
structure.  It worked for a single sync thread, but was not reentrant and
triggered infinite loops when multiple syncers came along.
Instead, move inodes with dirty to a separate cap_dirty_migrating list
when in the limbo export-but-no-import state, allowing us to go back to
the simple loop structure (which was reentrant).  This is cleaner and more
robust.
Audited the cap_dirty users and this looks fine:
list_empty(&ci->i_dirty_item) is still a reliable indicator of whether we
have dirty caps (which list we're on is irrelevant) and list_del_init()
calls still do the right thing.
Signed-off-by: Sage Weil <sage@newdream.net>
| -rw-r--r-- | fs/ceph/caps.c | 58 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 1 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 1 | 
3 files changed, 31 insertions, 29 deletions
| diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 591202bc966..1f72b00447c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2635,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,  			      struct ceph_mds_session *session,  			      int *open_target_sessions)  { +	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;  	struct ceph_inode_info *ci = ceph_inode(inode);  	int mds = session->s_mds;  	unsigned mseq = le32_to_cpu(ex->migrate_seq); @@ -2671,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,  			 * export targets, so that we get the matching IMPORT  			 */  			*open_target_sessions = 1; + +			/* +			 * we can't flush dirty caps that we've seen the +			 * EXPORT but no IMPORT for +			 */ +			spin_lock(&mdsc->cap_dirty_lock); +			if (!list_empty(&ci->i_dirty_item)) { +				dout(" moving %p to cap_dirty_migrating\n", +				     inode); +				list_move(&ci->i_dirty_item, +					  &mdsc->cap_dirty_migrating); +			} +			spin_unlock(&mdsc->cap_dirty_lock);  		}  		__ceph_remove_cap(cap);  	} @@ -2708,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,  		ci->i_cap_exporting_issued = 0;  		ci->i_cap_exporting_mseq = 0;  		ci->i_cap_exporting_mds = -1; + +		spin_lock(&mdsc->cap_dirty_lock); +		if (!list_empty(&ci->i_dirty_item)) { +			dout(" moving %p back to cap_dirty\n", inode); +			list_move(&ci->i_dirty_item, &mdsc->cap_dirty); +		} +		spin_unlock(&mdsc->cap_dirty_lock);  	} else {  		dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",  		     inode, ci, mds, mseq); @@ -2911,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)   */  void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)  { -	struct ceph_inode_info *ci, *nci = NULL; -	struct inode *inode, *ninode = NULL; -	struct list_head *p, *n; +	struct ceph_inode_info *ci; +	struct inode *inode;  	dout("flush_dirty_caps\n");  	spin_lock(&mdsc->cap_dirty_lock); -	list_for_each_safe(p, n, &mdsc->cap_dirty) { -		if (nci) { -			ci = nci; -			inode = ninode; -			ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; -			dout("flush_dirty_caps inode %p (was next inode)\n", -			     inode); -		} else { -			ci = list_entry(p, struct ceph_inode_info, -					i_dirty_item); -			inode = igrab(&ci->vfs_inode); -			BUG_ON(!inode); -			dout("flush_dirty_caps inode %p\n", inode); -		} -		if (n != &mdsc->cap_dirty) { -			nci = list_entry(n, struct ceph_inode_info, -					 i_dirty_item); -			ninode = igrab(&nci->vfs_inode); -			BUG_ON(!ninode); -			nci->i_ceph_flags |= CEPH_I_NOFLUSH; -			dout("flush_dirty_caps next inode %p, noflush\n", -			     ninode); -		} else { -			nci = NULL; -			ninode = NULL; -		} +	while (!list_empty(&mdsc->cap_dirty)) { +		ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, +				      i_dirty_item); +		inode = igrab(&ci->vfs_inode); +		dout("flush_dirty_caps %p\n", inode);  		spin_unlock(&mdsc->cap_dirty_lock);  		if (inode) {  			ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, @@ -2952,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)  		spin_lock(&mdsc->cap_dirty_lock);  	}  	spin_unlock(&mdsc->cap_dirty_lock); +	dout("flush_dirty_caps done\n");  }  /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c12d2e9a0ec..79743d146be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3004,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)  	spin_lock_init(&mdsc->snap_flush_lock);  	mdsc->cap_flush_seq = 0;  	INIT_LIST_HEAD(&mdsc->cap_dirty); +	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);  	mdsc->num_cap_flushing = 0;  	spin_lock_init(&mdsc->cap_dirty_lock);  	init_waitqueue_head(&mdsc->cap_flushing_wq); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e3a9cc0bba..7d8a0d662d5 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -278,6 +278,7 @@ struct ceph_mds_client {  	u64               cap_flush_seq;  	struct list_head  cap_dirty;        /* inodes with dirty caps */ +	struct list_head  cap_dirty_migrating; /* ...that are migration... */  	int               num_cap_flushing; /* # caps we are flushing */  	spinlock_t        cap_dirty_lock;   /* protects above items */  	wait_queue_head_t cap_flushing_wq; | 
