From a374c57b0764432a80303abee3d1afd1939b5a0a Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Mon, 13 Apr 2009 14:40:18 -0700 Subject: sgi-xpc: prevent false heartbeat failures The heartbeat timeout functionality in sgi-xpc is currently not trained to the connection time. If a connection is made and the code is in the last polling window prior to doing a timeout, the next polling window will see the heartbeat as unchanged and initiate a no-heartbeat disconnect. Signed-off-by: Robin Holt Signed-off-by: Dean Nelson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-xp/xpc.h | 100 ++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 62 deletions(-) (limited to 'drivers/misc/sgi-xp/xpc.h') diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h index 114444cfd496..da32bbe8caaf 100644 --- a/drivers/misc/sgi-xp/xpc.h +++ b/drivers/misc/sgi-xp/xpc.h @@ -90,18 +90,21 @@ struct xpc_rsvd_page { short max_npartitions; /* value of XPC_MAX_PARTITIONS */ u8 version; u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ union { - unsigned long vars_pa; /* phys address of struct xpc_vars */ - unsigned long activate_gru_mq_desc_gpa; /* phys addr of */ - /* activate mq's */ - /* gru mq descriptor */ + struct { + unsigned long vars_pa; /* phys addr */ + } sn2; + struct { + unsigned long heartbeat_gpa; /* phys addr */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr */ + } uv; } sn; - unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ - u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */ + u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */ u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ }; -#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */ +#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ /* * Define the structures by which XPC variables can be exported to other @@ -182,6 +185,17 @@ struct xpc_vars_part_sn2 { (XPC_RP_MACH_NASIDS(_rp) + \ xpc_nasid_mask_nlongs)) + +/* + * The following structure describes the partition's heartbeat info which + * will be periodically read by other partitions to determine whether this + * XPC is still 'alive'. + */ +struct xpc_heartbeat_uv { + unsigned long value; + unsigned long offline; /* if 0, heartbeat should be changing */ +}; + /* * Info pertinent to a GRU message queue using a watch list for irq generation. */ @@ -198,7 +212,7 @@ struct xpc_gru_mq_uv { /* * The activate_mq is used to send/receive GRU messages that affect XPC's - * heartbeat, partition active state, and channel state. This is UV only. + * partition active state and channel state. This is uv only. */ struct xpc_activate_mq_msghdr_uv { unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ @@ -210,33 +224,26 @@ struct xpc_activate_mq_msghdr_uv { /* activate_mq defined message types */ #define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 -#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1 -#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2 -#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3 -#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4 -#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5 +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8 -#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6 -#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10 -#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11 +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 7 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 8 struct xpc_activate_mq_msg_uv { struct xpc_activate_mq_msghdr_uv hdr; }; -struct xpc_activate_mq_msg_heartbeat_req_uv { - struct xpc_activate_mq_msghdr_uv hdr; - u64 heartbeat; -}; - struct xpc_activate_mq_msg_activate_req_uv { struct xpc_activate_mq_msghdr_uv hdr; unsigned long rp_gpa; + unsigned long heartbeat_gpa; unsigned long activate_gru_mq_desc_gpa; }; @@ -687,6 +694,9 @@ struct xpc_partition_sn2 { }; struct xpc_partition_uv { + unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ + struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ + /* partition's heartbeat */ unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ /* activate mq's gru mq */ /* descriptor */ @@ -698,14 +708,12 @@ struct xpc_partition_uv { u8 remote_act_state; /* remote partition's act_state */ u8 act_state_req; /* act_state request from remote partition */ enum xp_retval reason; /* reason for deactivate act_state request */ - u64 heartbeat; /* incremented by remote partition */ }; /* struct xpc_partition_uv flags */ -#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001 #define XPC_P_ENGAGED_UV 0x00000002 -#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004 /* struct xpc_partition_uv act_state change requests */ @@ -807,7 +815,6 @@ extern int xpc_disengage_timedout; extern int xpc_activate_IRQ_rcvd; extern spinlock_t xpc_activate_IRQ_rcvd_lock; extern wait_queue_head_t xpc_activate_IRQ_wq; -extern void *xpc_heartbeating_to_mask; extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); @@ -825,6 +832,9 @@ extern void (*xpc_increment_heartbeat) (void); extern void (*xpc_offline_heartbeat) (void); extern void (*xpc_online_heartbeat) (void); extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *); +extern void (*xpc_allow_hb) (short); +extern void (*xpc_disallow_hb) (short); +extern void (*xpc_disallow_all_hbs) (void); extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *); extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *); extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *); @@ -909,40 +919,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *, extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); -static inline int -xpc_hb_allowed(short partid, void *heartbeating_to_mask) -{ - return test_bit(partid, heartbeating_to_mask); -} - -static inline int -xpc_any_hbs_allowed(void) -{ - DBUG_ON(xpc_heartbeating_to_mask == NULL); - return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions); -} - -static inline void -xpc_allow_hb(short partid) -{ - DBUG_ON(xpc_heartbeating_to_mask == NULL); - set_bit(partid, xpc_heartbeating_to_mask); -} - -static inline void -xpc_disallow_hb(short partid) -{ - DBUG_ON(xpc_heartbeating_to_mask == NULL); - clear_bit(partid, xpc_heartbeating_to_mask); -} - -static inline void -xpc_disallow_all_hbs(void) -{ - DBUG_ON(xpc_heartbeating_to_mask == NULL); - bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions); -} - static inline void xpc_wakeup_channel_mgr(struct xpc_partition *part) { -- cgit v1.2.3