summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/agent.c19
-rw-r--r--drivers/infiniband/core/cm.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c6
-rw-r--r--drivers/infiniband/core/mad.c195
-rw-r--r--drivers/infiniband/core/mad_priv.h16
-rw-r--r--drivers/infiniband/core/mad_rmpp.c148
-rw-r--r--drivers/infiniband/core/smi.h9
-rw-r--r--drivers/infiniband/core/sysfs.c36
-rw-r--r--drivers/infiniband/core/user_mad.c225
-rw-r--r--drivers/infiniband/core/uverbs.h5
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c202
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/core/verbs.c259
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c161
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c29
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c170
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c505
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c49
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c36
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c230
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
39 files changed, 2127 insertions, 918 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 34b724afd28d..ecd1a3057c61 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -78,25 +78,6 @@ ib_get_agent_port(struct ib_device *device, int port_num)
return entry;
}
-int smi_check_local_dr_smp(struct ib_smp *smp,
- struct ib_device *device,
- int port_num)
-{
- struct ib_agent_port_private *port_priv;
-
- if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- return 1;
-
- port_priv = ib_get_agent_port(device, port_num);
- if (!port_priv) {
- printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
- "not open\n", device->name, port_num);
- return 1;
- }
-
- return smi_check_local_smp(port_priv->agent[0], smp);
-}
-
int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
struct ib_wc *wc, struct ib_device *device,
int port_num, int qpn)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2514de3480d8..7cfedb8d9bcd 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -121,7 +121,7 @@ struct cm_id_private {
struct rb_node service_node;
struct rb_node sidr_id_node;
- spinlock_t lock;
+ spinlock_t lock; /* Do not acquire inside cm.lock */
wait_queue_head_t wait;
atomic_t refcount;
@@ -1547,40 +1547,46 @@ static int cm_rep_handler(struct cm_work *work)
return -EINVAL;
}
+ cm_format_rep_event(work);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ break;
+ default:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ret = -EINVAL;
+ goto error;
+ }
+
cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
- spin_lock_irqsave(&cm.lock, flags);
+ spin_lock(&cm.lock);
/* Check for duplicate REP. */
if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
- spin_unlock_irqrestore(&cm.lock, flags);
+ spin_unlock(&cm.lock);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = -EINVAL;
goto error;
}
/* Check for a stale connection. */
if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
- spin_unlock_irqrestore(&cm.lock, flags);
+ rb_erase(&cm_id_priv->timewait_info->remote_id_node,
+ &cm.remote_id_table);
+ cm_id_priv->timewait_info->inserted_remote_id = 0;
+ spin_unlock(&cm.lock);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
goto error;
}
- spin_unlock_irqrestore(&cm.lock, flags);
-
- cm_format_rep_event(work);
+ spin_unlock(&cm.lock);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id_priv->id.state) {
- case IB_CM_REQ_SENT:
- case IB_CM_MRA_REQ_RCVD:
- break;
- default:
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto error;
- }
cm_id_priv->id.state = IB_CM_REP_RCVD;
cm_id_priv->id.remote_id = rep_msg->local_comm_id;
cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
@@ -1603,7 +1609,7 @@ static int cm_rep_handler(struct cm_work *work)
cm_deref_id(cm_id_priv);
return 0;
-error: cm_cleanup_timewait(cm_id_priv->timewait_info);
+error:
cm_deref_id(cm_id_priv);
return ret;
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index d34a6f1c4f4c..838bf54458d2 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -278,9 +278,9 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_pool_fmr *fmr;
struct ib_fmr_attr attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = IB_FMR_MAX_REMAPS,
- .page_size = PAGE_SHIFT
+ .max_pages = params->max_pages_per_fmr,
+ .max_maps = IB_FMR_MAX_REMAPS,
+ .page_shift = params->page_shift
};
for (i = 0; i < params->pool_size; ++i) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c82f47a66e48..f7854b65fd55 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
+ * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/dma-mapping.h>
@@ -679,8 +679,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
goto out;
}
/* Check to post send on QP or process locally */
- ret = smi_check_local_dr_smp(smp, device, port_num);
- if (!ret || !device->process_mad)
+ ret = smi_check_local_smp(smp, device);
+ if (!ret)
goto out;
local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -765,18 +765,67 @@ out:
return ret;
}
-static int get_buf_length(int hdr_len, int data_len)
+static int get_pad_size(int hdr_len, int data_len)
{
int seg_size, pad;
seg_size = sizeof(struct ib_mad) - hdr_len;
if (data_len && seg_size) {
pad = seg_size - data_len % seg_size;
- if (pad == seg_size)
- pad = 0;
+ return pad == seg_size ? 0 : pad;
} else
- pad = seg_size;
- return hdr_len + data_len + pad;
+ return seg_size;
+}
+
+static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_segment *s, *t;
+
+ list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
+static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
+ gfp_t gfp_mask)
+{
+ struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
+ struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
+ struct ib_rmpp_segment *seg = NULL;
+ int left, seg_size, pad;
+
+ send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
+ seg_size = send_buf->seg_size;
+ pad = send_wr->pad;
+
+ /* Allocate data segments. */
+ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
+ seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ if (!seg) {
+ printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
+ "alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
+ free_send_rmpp_list(send_wr);
+ return -ENOMEM;
+ }
+ seg->num = ++send_buf->seg_count;
+ list_add_tail(&seg->list, &send_wr->rmpp_list);
+ }
+
+ /* Zero any padding */
+ if (pad)
+ memset(seg->data + seg_size - pad, 0, pad);
+
+ rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
+ agent.rmpp_version;
+ rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
+ ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+
+ send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
+ struct ib_rmpp_segment, list);
+ send_wr->last_ack_seg = send_wr->cur_seg;
+ return 0;
}
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
@@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
- int buf_size;
+ int pad, message_size, ret, size;
void *buf;
mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
- buf_size = get_buf_length(hdr_len, data_len);
+ pad = get_pad_size(hdr_len, data_len);
+ message_size = hdr_len + data_len + pad;
if ((!mad_agent->rmpp_version &&
- (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
- (!rmpp_active && buf_size > sizeof(struct ib_mad)))
+ (rmpp_active || message_size > sizeof(struct ib_mad))) ||
+ (!rmpp_active && message_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);
- buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
+ size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
+ buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);
- mad_send_wr = buf + buf_size;
+ mad_send_wr = buf + size;
+ INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
mad_send_wr->send_buf.mad = buf;
+ mad_send_wr->send_buf.hdr_len = hdr_len;
+ mad_send_wr->send_buf.data_len = data_len;
+ mad_send_wr->pad = pad;
mad_send_wr->mad_agent_priv = mad_agent_priv;
- mad_send_wr->sg_list[0].length = buf_size;
+ mad_send_wr->sg_list[0].length = hdr_len;
mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+ mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
+ mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- mad_send_wr->send_wr.num_sge = 1;
+ mad_send_wr->send_wr.num_sge = 2;
mad_send_wr->send_wr.opcode = IB_WR_SEND;
mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
if (rmpp_active) {
- struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
- rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
- IB_MGMT_RMPP_HDR + data_len);
- rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
- rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
- ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
- IB_MGMT_RMPP_FLAG_ACTIVE);
+ ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
+ if (ret) {
+ kfree(buf);
+ return ERR_PTR(ret);
+ }
}
mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_create_send_mad);
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct list_head *list;
+
+ mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+ send_buf);
+ list = &mad_send_wr->cur_seg->list;
+
+ if (mad_send_wr->cur_seg->num < seg_num) {
+ list_for_each_entry(mad_send_wr->cur_seg, list, list)
+ if (mad_send_wr->cur_seg->num == seg_num)
+ break;
+ } else if (mad_send_wr->cur_seg->num > seg_num) {
+ list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
+ if (mad_send_wr->cur_seg->num == seg_num)
+ break;
+ }
+ return mad_send_wr->cur_seg->data;
+}
+EXPORT_SYMBOL(ib_get_rmpp_segment);
+
+static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ if (mad_send_wr->send_buf.seg_count)
+ return ib_get_rmpp_segment(&mad_send_wr->send_buf,
+ mad_send_wr->seg_num);
+ else
+ return mad_send_wr->send_buf.mad +
+ mad_send_wr->send_buf.hdr_len;
+}
+
void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
{
struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
- kfree(send_buf->mad);
+ mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+ send_buf);
+ free_send_rmpp_list(mad_send_wr);
+ kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
}
@@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
- sge->addr = dma_map_single(mad_agent->device->dma_device,
- mad_send_wr->send_buf.mad, sge->length,
- DMA_TO_DEVICE);
- pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+ sge[0].addr = dma_map_single(mad_agent->device->dma_device,
+ mad_send_wr->send_buf.mad,
+ sge[0].length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
+
+ sge[1].addr = dma_map_single(mad_agent->device->dma_device,
+ ib_get_payload(mad_send_wr),
+ sge[1].length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
- if (ret)
+ if (ret) {
dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(mad_send_wr, mapping),
- sge->length, DMA_TO_DEVICE);
-
+ pci_unmap_addr(mad_send_wr, header_mapping),
+ sge[0].length, DMA_TO_DEVICE);
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, payload_mapping),
+ sge[1].length, DMA_TO_DEVICE);
+ }
return ret;
}
@@ -1661,9 +1762,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
port_priv->device->node_type,
port_priv->port_num))
goto out;
- if (!smi_check_local_dr_smp(&recv->mad.smp,
- port_priv->device,
- port_priv->port_num))
+ if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
goto out;
}
@@ -1862,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
retry:
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
- pci_unmap_addr(mad_send_wr, mapping),
+ pci_unmap_addr(mad_send_wr, header_mapping),
mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+ dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, payload_mapping),
+ mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
@@ -2262,8 +2364,12 @@ static void timeout_sends(void *data)
static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
{
struct ib_mad_port_private *port_priv = cq->cq_context;
+ unsigned long flags;
- queue_work(port_priv->wq, &port_priv->work);
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ if (!list_empty(&port_priv->port_list))
+ queue_work(port_priv->wq, &port_priv->work);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
}
/*
@@ -2575,18 +2681,23 @@ static int ib_mad_port_open(struct ib_device *device,
}
INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ list_add_tail(&port_priv->port_list, &ib_mad_port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
ret = ib_mad_port_start(port_priv);
if (ret) {
printk(KERN_ERR PFX "Couldn't start port\n");
goto error9;
}
- spin_lock_irqsave(&ib_mad_port_list_lock, flags);
- list_add_tail(&port_priv->port_list, &ib_mad_port_list);
- spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
return 0;
error9:
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ list_del_init(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
destroy_workqueue(port_priv->wq);
error8:
destroy_mad_qp(&port_priv->qp_info[1]);
@@ -2623,11 +2734,9 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
printk(KERN_ERR PFX "Port %d not found\n", port_num);
return -ENODEV;
}
- list_del(&port_priv->port_list);
+ list_del_init(&port_priv->port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- /* Stop processing completions. */
- flush_workqueue(port_priv->wq);
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 570f78682af3..a7125d4b5ccf 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#ifndef __IB_MAD_PRIV_H__
@@ -85,6 +85,12 @@ struct ib_mad_private {
} mad;
} __attribute__ ((packed));
+struct ib_rmpp_segment {
+ struct list_head list;
+ u32 num;
+ u8 data[0];
+};
+
struct ib_mad_agent_private {
struct list_head agent_list;
struct ib_mad_agent agent;
@@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_buf send_buf;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DECLARE_PCI_UNMAP_ADDR(header_mapping)
+ DECLARE_PCI_UNMAP_ADDR(payload_mapping)
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
@@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
enum ib_wc_status status;
/* RMPP control */
+ struct list_head rmpp_list;
+ struct ib_rmpp_segment *last_ack_seg;
+ struct ib_rmpp_segment *cur_seg;
int last_ack;
int seg_num;
int newwin;
- int total_seg;
- int data_offset;
int pad;
};
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 3249e1d8c07b..bacfdd5bddad 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -111,14 +111,14 @@ static int data_offset(u8 mgmt_class)
return IB_MGMT_RMPP_HDR;
}
-static void format_ack(struct ib_rmpp_mad *ack,
+static void format_ack(struct ib_mad_send_buf *msg,
struct ib_rmpp_mad *data,
struct mad_rmpp_recv *rmpp_recv)
{
+ struct ib_rmpp_mad *ack = msg->mad;
unsigned long flags;
- memcpy(&ack->mad_hdr, &data->mad_hdr,
- data_offset(data->mad_hdr.mgmt_class));
+ memcpy(ack, &data->mad_hdr, msg->hdr_len);
ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
@@ -135,16 +135,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
- int ret;
+ int ret, hdr_len;
+ hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
- recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
- IB_MGMT_RMPP_DATA, GFP_KERNEL);
+ recv_wc->wc->pkey_index, 1, hdr_len,
+ 0, GFP_KERNEL);
if (!msg)
return;
- format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
- rmpp_recv);
+ format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
msg->ah = rmpp_recv->ah;
ret = ib_post_send_mad(msg, NULL);
if (ret)
@@ -156,16 +156,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
{
struct ib_mad_send_buf *msg;
struct ib_ah *ah;
+ int hdr_len;
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
return (void *) ah;
+ hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
recv_wc->wc->pkey_index, 1,
- IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
- GFP_KERNEL);
+ hdr_len, 0, GFP_KERNEL);
if (IS_ERR(msg))
ib_destroy_ah(ah);
else
@@ -195,8 +196,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
return;
rmpp_mad = msg->mad;
- memcpy(rmpp_mad, recv_wc->recv_buf.mad,
- data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
+ memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
@@ -433,44 +433,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
return rmpp_wc;
}
-void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf)
-{
- struct ib_mad_recv_buf *seg_buf;
- struct ib_rmpp_mad *rmpp_mad;
- void *data;
- int size, len, offset;
- u8 flags;
-
- len = mad_recv_wc->mad_len;
- if (len <= sizeof(struct ib_mad)) {
- memcpy(buf, mad_recv_wc->recv_buf.mad, len);
- return;
- }
-
- offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
-
- list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) {
- rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad;
- flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr);
-
- if (flags & IB_MGMT_RMPP_FLAG_FIRST) {
- data = rmpp_mad;
- size = sizeof(*rmpp_mad);
- } else {
- data = (void *) rmpp_mad + offset;
- if (flags & IB_MGMT_RMPP_FLAG_LAST)
- size = len;
- else
- size = sizeof(*rmpp_mad) - offset;
- }
-
- memcpy(buf, data, size);
- len -= size;
- buf += size;
- }
-}
-EXPORT_SYMBOL(ib_coalesce_recv_mad);
-
static struct ib_mad_recv_wc *
continue_rmpp(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
@@ -570,50 +532,33 @@ start_rmpp(struct ib_mad_agent_private *agent,
return mad_recv_wc;
}
-static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
-{
- return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
- (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
- (mad_send_wr->seg_num - 1);
-}
-
static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int timeout;
- u32 paylen;
+ u32 paylen = 0;
rmpp_mad = mad_send_wr->send_buf.mad;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
- rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
+ rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
if (mad_send_wr->seg_num == 1) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
- paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
+ paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
mad_send_wr->pad;
- rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
- mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
- } else {
- mad_send_wr->send_wr.num_sge = 2;
- mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
- mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
- mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
- mad_send_wr->data_offset;
- mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
- rmpp_mad->rmpp_hdr.paylen_newwin = 0;
}
- if (mad_send_wr->seg_num == mad_send_wr->total_seg) {
+ if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
- rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
}
+ rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
/* 2 seconds for an ACK until we can find the packet lifetime */
timeout = mad_send_wr->send_buf.timeout_ms;
if (!timeout || timeout > 2000)
mad_send_wr->timeout = msecs_to_jiffies(2000);
- mad_send_wr->seg_num++;
+
return ib_send_mad(mad_send_wr);
}
@@ -629,7 +574,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
if (!mad_send_wr)
goto out; /* Unmatched send */
- if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
+ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
@@ -645,6 +590,18 @@ out:
spin_unlock_irqrestore(&agent->lock, flags);
}
+static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
+ int seg_num)
+{
+ struct list_head *list;
+
+ wr->last_ack = seg_num;
+ list = &wr->last_ack_seg->list;
+ list_for_each_entry(wr->last_ack_seg, list, list)
+ if (wr->last_ack_seg->num == seg_num)
+ break;
+}
+
static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
@@ -675,11 +632,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (!mad_send_wr)
goto out; /* Unmatched ACK */
- if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
+ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
- if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) {
+ if (seg_num > mad_send_wr->send_buf.seg_count ||
+ seg_num > mad_send_wr->newwin) {
spin_unlock_irqrestore(&agent->lock, flags);
abort_send(agent, rmpp_mad->mad_hdr.tid,
IB_MGMT_RMPP_STATUS_S2B);
@@ -691,11 +649,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
goto out; /* Old ACK */
if (seg_num > mad_send_wr->last_ack) {
- mad_send_wr->last_ack = seg_num;
+ adjust_last_ack(mad_send_wr, seg_num);
mad_send_wr->retries = mad_send_wr->send_buf.retries;
}
mad_send_wr->newwin = newwin;
- if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
/* If no response is expected, the ACK completes the send */
if (!mad_send_wr->send_buf.timeout_ms) {
struct ib_mad_send_wc wc;
@@ -714,7 +672,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
mad_send_wr->send_buf.timeout_ms);
} else if (mad_send_wr->refcount == 1 &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
- mad_send_wr->seg_num <= mad_send_wr->total_seg) {
+ mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
ret = send_next_seg(mad_send_wr);
if (ret)
@@ -838,31 +796,19 @@ out:
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
- int i, total_len, ret;
+ int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED;
- if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
+ mad_send_wr->seg_num = 1;
return IB_RMPP_RESULT_INTERNAL;
+ }
- if (mad_send_wr->send_wr.num_sge > 1)
- return -EINVAL; /* TODO: support num_sge > 1 */
-
- mad_send_wr->seg_num = 1;
mad_send_wr->newwin = 1;
- mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
-
- total_len = 0;
- for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
- total_len += mad_send_wr->send_wr.sg_list[i].length;
-
- mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
- (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
- mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
- be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
/* We need to wait for the final ACK even if there isn't a response */
mad_send_wr->refcount += (mad_send_wr->timeout == 0);
@@ -893,14 +839,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
if (!mad_send_wr->timeout)
return IB_RMPP_RESULT_PROCESSED; /* Response received */
- if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
return IB_RMPP_RESULT_PROCESSED; /* Send done */
}
- if (mad_send_wr->seg_num > mad_send_wr->newwin ||
- mad_send_wr->seg_num > mad_send_wr->total_seg)
+ if (mad_send_wr->seg_num == mad_send_wr->newwin ||
+ mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
ret = send_next_seg(mad_send_wr);
@@ -921,10 +867,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
- if (mad_send_wr->last_ack == mad_send_wr->total_seg)
+ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED;
- mad_send_wr->seg_num = mad_send_wr->last_ack + 1;
+ mad_send_wr->seg_num = mad_send_wr->last_ack;
+ mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
+
ret = send_next_seg(mad_send_wr);
if (ret)
return IB_RMPP_RESULT_PROCESSED;
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 2b3c40198f81..3011bfd86dc5 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -49,19 +49,16 @@ extern int smi_check_forward_dr_smp(struct ib_smp *smp);
extern int smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type,
int port_num);
-extern int smi_check_local_dr_smp(struct ib_smp *smp,
- struct ib_device *device,
- int port_num);
/*
* Return 1 if the SMP should be handled by the local SMA/SM via process_mad
*/
-static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent,
- struct ib_smp *smp)
+static inline int smi_check_local_smp(struct ib_smp *smp,
+ struct ib_device *device)
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return ((mad_agent->device->process_mad &&
+ return ((device->process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)));
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 5982d687a000..15121cb5a1f6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -112,7 +112,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ?
+ attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@@ -472,8 +472,10 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
goto err;
if (snprintf(element->name, sizeof(element->name),
- "%d", i) >= sizeof(element->name))
+ "%d", i) >= sizeof(element->name)) {
+ kfree(element);
goto err;
+ }
element->attr.attr.name = element->name;
element->attr.attr.mode = S_IRUGO;
@@ -628,14 +630,42 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
}
+static ssize_t show_node_desc(struct class_device *cdev, char *buf)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+
+ return sprintf(buf, "%.64s\n", dev->node_desc);
+}
+
+static ssize_t set_node_desc(struct class_device *cdev, const char *buf,
+ size_t count)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ struct ib_device_modify desc = {};
+ int ret;
+
+ if (!dev->modify_device)
+ return -EIO;
+
+ memcpy(desc.node_desc, buf, min_t(int, count, 64));
+ ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
+static CLASS_DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc,
+ set_node_desc);
static struct class_device_attribute *ib_class_attributes[] = {
&class_device_attr_node_type,
&class_device_attr_sys_image_guid,
- &class_device_attr_node_guid
+ &class_device_attr_node_guid,
+ &class_device_attr_node_desc
};
static struct class ib_class = {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c908de8db5a9..fb6cd42601f9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $
+ * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/module.h>
@@ -121,6 +121,7 @@ struct ib_umad_file {
struct ib_umad_packet {
struct ib_mad_send_buf *msg;
+ struct ib_mad_recv_wc *recv_wc;
struct list_head list;
int length;
struct ib_user_mad mad;
@@ -176,31 +177,32 @@ static int queue_packet(struct ib_umad_file *file,
return ret;
}
+static int data_offset(u8 mgmt_class)
+{
+ if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
+ return IB_MGMT_SA_HDR;
+ else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+ (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
+ return IB_MGMT_VENDOR_HDR;
+ else
+ return IB_MGMT_RMPP_HDR;
+}
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *send_wc)
{
struct ib_umad_file *file = agent->context;
- struct ib_umad_packet *timeout;
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
ib_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
- timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
- if (!timeout)
- goto out;
-
- timeout->length = IB_MGMT_MAD_HDR;
- timeout->mad.hdr.id = packet->mad.hdr.id;
- timeout->mad.hdr.status = ETIMEDOUT;
- memcpy(timeout->mad.data, packet->mad.data,
- sizeof (struct ib_mad_hdr));
-
- if (queue_packet(file, agent, timeout))
- kfree(timeout);
+ packet->length = IB_MGMT_MAD_HDR;
+ packet->mad.hdr.status = ETIMEDOUT;
+ if (!queue_packet(file, agent, packet))
+ return;
}
-out:
kfree(packet);
}
@@ -209,22 +211,20 @@ static void recv_handler(struct ib_mad_agent *agent,
{
struct ib_umad_file *file = agent->context;
struct ib_umad_packet *packet;
- int length;
if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
- goto out;
+ goto err1;
- length = mad_recv_wc->mad_len;
- packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
+ packet = kzalloc(sizeof *packet, GFP_KERNEL);
if (!packet)
- goto out;
+ goto err1;
- packet->length = length;
-
- ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
+ packet->length = mad_recv_wc->mad_len;
+ packet->recv_wc = mad_recv_wc;
packet->mad.hdr.status = 0;
- packet->mad.hdr.length = length + sizeof (struct ib_user_mad);
+ packet->mad.hdr.length = sizeof (struct ib_user_mad) +
+ mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
@@ -240,12 +240,79 @@ static void recv_handler(struct ib_mad_agent *agent,
}
if (queue_packet(file, agent, packet))
- kfree(packet);
+ goto err2;
+ return;
-out:
+err2:
+ kfree(packet);
+err1:
ib_free_recv_mad(mad_recv_wc);
}
+static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
+ size_t count)
+{
+ struct ib_mad_recv_buf *recv_buf;
+ int left, seg_payload, offset, max_seg_payload;
+
+ /* We need enough room to copy the first (or only) MAD segment. */
+ recv_buf = &packet->recv_wc->recv_buf;
+ if ((packet->length <= sizeof (*recv_buf->mad) &&
+ count < sizeof (packet->mad) + packet->length) ||
+ (packet->length > sizeof (*recv_buf->mad) &&
+ count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
+ return -EINVAL;
+
+ if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
+ return -EFAULT;
+
+ buf += sizeof (packet->mad);
+ seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
+ if (copy_to_user(buf, recv_buf->mad, seg_payload))
+ return -EFAULT;
+
+ if (seg_payload < packet->length) {
+ /*
+ * Multipacket RMPP MAD message. Copy remainder of message.
+ * Note that last segment may have a shorter payload.
+ */
+ if (count < sizeof (packet->mad) + packet->length) {
+ /*
+ * The buffer is too small, return the first RMPP segment,
+ * which includes the RMPP message length.
+ */
+ return -ENOSPC;
+ }
+ offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class);
+ max_seg_payload = sizeof (struct ib_mad) - offset;
+
+ for (left = packet->length - seg_payload, buf += seg_payload;
+ left; left -= seg_payload, buf += seg_payload) {
+ recv_buf = container_of(recv_buf->list.next,
+ struct ib_mad_recv_buf, list);
+ seg_payload = min(left, max_seg_payload);
+ if (copy_to_user(buf, ((void *) recv_buf->mad) + offset,
+ seg_payload))
+ return -EFAULT;
+ }
+ }
+ return sizeof (packet->mad) + packet->length;
+}
+
+static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
+ size_t count)
+{
+ ssize_t size = sizeof (packet->mad) + packet->length;
+
+ if (count < size)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &packet->mad, size))
+ return -EFAULT;
+
+ return size;
+}
+
static ssize_t ib_umad_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -253,7 +320,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
struct ib_umad_packet *packet;
ssize_t ret;
- if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad))
+ if (count < sizeof (struct ib_user_mad))
return -EINVAL;
spin_lock_irq(&file->recv_lock);
@@ -276,28 +343,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
spin_unlock_irq(&file->recv_lock);
- if (count < packet->length + sizeof (struct ib_user_mad)) {
- /* Return length needed (and first RMPP segment) if too small */
- if (copy_to_user(buf, &packet->mad,
- sizeof (struct ib_user_mad) + sizeof (struct ib_mad)))
- ret = -EFAULT;
- else
- ret = -ENOSPC;
- } else if (copy_to_user(buf, &packet->mad,
- packet->length + sizeof (struct ib_user_mad)))
- ret = -EFAULT;
+ if (packet->recv_wc)
+ ret = copy_recv_mad(buf, packet, count);
else
- ret = packet->length + sizeof (struct ib_user_mad);
+ ret = copy_send_mad(buf, packet, count);
+
if (ret < 0) {
/* Requeue packet */
spin_lock_irq(&file->recv_lock);
list_add(&packet->list, &file->recv_list);
spin_unlock_irq(&file->recv_lock);
- } else
+ } else {
+ if (packet->recv_wc)
+ ib_free_recv_mad(packet->recv_wc);
kfree(packet);
+ }
return ret;
}
+static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf)
+{
+ int left, seg;
+
+ /* Copy class specific header */
+ if ((msg->hdr_len > IB_MGMT_RMPP_HDR) &&
+ copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR,
+ msg->hdr_len - IB_MGMT_RMPP_HDR))
+ return -EFAULT;
+
+ /* All headers are in place. Copy data segments. */
+ for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0;
+ seg++, left -= msg->seg_size, buf += msg->seg_size) {
+ if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf,
+ min(left, msg->seg_size)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
@@ -309,14 +392,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_rmpp_mad *rmpp_mad;
u8 method;
__be64 *tid;
- int ret, length, hdr_len, copy_offset;
- int rmpp_active, has_rmpp_header;
+ int ret, data_len, hdr_len, copy_offset, rmpp_active;
if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
return -EINVAL;
- length = count - sizeof (struct ib_user_mad);
- packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
@@ -363,35 +444,25 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
hdr_len = IB_MGMT_SA_HDR;
copy_offset = IB_MGMT_RMPP_HDR;
- has_rmpp_header = 1;
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
} else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
- hdr_len = IB_MGMT_VENDOR_HDR;
- copy_offset = IB_MGMT_RMPP_HDR;
- has_rmpp_header = 1;
+ hdr_len = IB_MGMT_VENDOR_HDR;
+ copy_offset = IB_MGMT_RMPP_HDR;
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
} else {
hdr_len = IB_MGMT_MAD_HDR;
copy_offset = IB_MGMT_MAD_HDR;
- has_rmpp_header = 0;
- }
-
- if (has_rmpp_header)
- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
- IB_MGMT_RMPP_FLAG_ACTIVE;
- else
rmpp_active = 0;
-
- /* Validate that the management class can support RMPP */
- if (rmpp_active && !agent->rmpp_version) {
- ret = -EINVAL;
- goto err_ah;
}
+ data_len = count - sizeof (struct ib_user_mad) - hdr_len;
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
- 0, rmpp_active,
- hdr_len, length - hdr_len,
- GFP_KERNEL);
+ 0, rmpp_active, hdr_len,
+ data_len, GFP_KERNEL);
if (IS_ERR(packet->msg)) {
ret = PTR_ERR(packet->msg);
goto err_ah;
@@ -402,14 +473,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
packet->msg->retries = packet->mad.hdr.retries;
packet->msg->context[0] = packet;
- /* Copy MAD headers (RMPP header in place) */
+ /* Copy MAD header. Any RMPP header is already in place. */
memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
- /* Now, copy rest of message from user into send buffer */
- if (copy_from_user(packet->msg->mad + copy_offset,
- buf + sizeof (struct ib_user_mad) + copy_offset,
- length - copy_offset)) {
- ret = -EFAULT;
- goto err_msg;
+ buf += sizeof (struct ib_user_mad);
+
+ if (!rmpp_active) {
+ if (copy_from_user(packet->msg->mad + copy_offset,
+ buf + copy_offset,
+ hdr_len + data_len - copy_offset)) {
+ ret = -EFAULT;
+ goto err_msg;
+ }
+ } else {
+ ret = copy_rmpp_mad(packet->msg, buf);
+ if (ret)
+ goto err_msg;
}
/*
@@ -433,18 +511,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err_msg;
up_read(&file->port->mutex);
-
return count;
err_msg:
ib_free_send_mad(packet->msg);
-
err_ah:
ib_destroy_ah(ah);
-
err_up:
up_read(&file->port->mutex);
-
err:
kfree(packet);
return ret;
@@ -627,8 +701,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
already_dead = file->agents_dead;
file->agents_dead = 1;
- list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
+ list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
+ if (packet->recv_wc)
+ ib_free_recv_mad(packet->recv_wc);
kfree(packet);
+ }
list_del(&file->port_list);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index f7eecbc6af6c..3372d67ff139 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -178,10 +178,12 @@ IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(resize_cq);
IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
IB_UVERBS_DECLARE_CMD(post_send);
@@ -193,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(attach_mcast);
IB_UVERBS_DECLARE_CMD(detach_mcast);
IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
+IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 407b6284d7d5..9f69bd48eb1b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,7 +1,8 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -675,6 +676,46 @@ err:
return ret;
}
+ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_resize_cq cmd;
+ struct ib_uverbs_resize_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_cq *cq;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ mutex_lock(&ib_uverbs_idr_mutex);
+
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq)
+ goto out;
+
+ ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ mutex_unlock(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -956,6 +997,106 @@ err_up:
return ret;
}
+ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_query_qp cmd;
+ struct ib_uverbs_query_qp_resp resp;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ struct ib_qp_init_attr *init_attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
+ else
+ ret = -EINVAL;
+
+ mutex_unlock(&ib_uverbs_idr_mutex);
+
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.qp_state = attr->qp_state;
+ resp.cur_qp_state = attr->cur_qp_state;
+ resp.path_mtu = attr->path_mtu;
+ resp.path_mig_state = attr->path_mig_state;
+ resp.qkey = attr->qkey;
+ resp.rq_psn = attr->rq_psn;
+ resp.sq_psn = attr->sq_psn;
+ resp.dest_qp_num = attr->dest_qp_num;
+ resp.qp_access_flags = attr->qp_access_flags;
+ resp.pkey_index = attr->pkey_index;
+ resp.alt_pkey_index = attr->alt_pkey_index;
+ resp.en_sqd_async_notify = attr->en_sqd_async_notify;
+ resp.max_rd_atomic = attr->max_rd_atomic;
+ resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.port_num = attr->port_num;
+ resp.timeout = attr->timeout;
+ resp.retry_cnt = attr->retry_cnt;
+ resp.rnr_retry = attr->rnr_retry;
+ resp.alt_port_num = attr->alt_port_num;
+ resp.alt_timeout = attr->alt_timeout;
+
+ memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+ resp.dest.flow_label = attr->ah_attr.grh.flow_label;
+ resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
+ resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
+ resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
+ resp.dest.dlid = attr->ah_attr.dlid;
+ resp.dest.sl = attr->ah_attr.sl;
+ resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
+ resp.dest.static_rate = attr->ah_attr.static_rate;
+ resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+ resp.dest.port_num = attr->ah_attr.port_num;
+
+ memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+ resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
+ resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
+ resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
+ resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+ resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
+ resp.alt_dest.sl = attr->alt_ah_attr.sl;
+ resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+ resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
+ resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+ resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+
+ resp.max_send_wr = init_attr->cap.max_send_wr;
+ resp.max_recv_wr = init_attr->cap.max_recv_wr;
+ resp.max_send_sge = init_attr->cap.max_send_sge;
+ resp.max_recv_sge = init_attr->cap.max_recv_sge;
+ resp.max_inline_data = init_attr->cap.max_inline_data;
+ resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ kfree(attr);
+ kfree(init_attr);
+
+ return ret ? ret : in_len;
+}
+
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -990,7 +1131,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->dest_qp_num = cmd.dest_qp_num;
attr->qp_access_flags = cmd.qp_access_flags;
attr->pkey_index = cmd.pkey_index;
- attr->alt_pkey_index = cmd.pkey_index;
+ attr->alt_pkey_index = cmd.alt_pkey_index;
attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
attr->max_rd_atomic = cmd.max_rd_atomic;
attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
@@ -1094,8 +1235,8 @@ out:
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
@@ -1323,8 +1464,8 @@ err:
}
ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
@@ -1374,8 +1515,8 @@ out:
}
ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
@@ -1723,6 +1864,8 @@ retry:
goto err_destroy;
resp.srq_handle = uobj->uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
@@ -1783,6 +1926,49 @@ out:
return ret ? ret : in_len;
}
+ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_srq cmd;
+ struct ib_uverbs_query_srq_resp resp;
+ struct ib_srq_attr attr;
+ struct ib_srq *srq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ mutex_lock(&ib_uverbs_idr_mutex);
+
+ srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+ if (srq && srq->uobject->context == file->ucontext)
+ ret = ib_query_srq(srq, &attr);
+ else
+ ret = -EINVAL;
+
+ mutex_unlock(&ib_uverbs_idr_mutex);
+
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.max_wr = attr.max_wr;
+ resp.max_sge = attr.max_sge;
+ resp.srq_limit = attr.srq_limit;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ return ret ? ret : in_len;
+}
+
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 903f85a4bc0c..ff092a0a94da 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -91,10 +91,12 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
[IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
[IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
[IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
[IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
[IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
[IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
[IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
@@ -106,6 +108,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
[IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
[IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
@@ -461,7 +464,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
-
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c857361be449..cae0845f472a 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -5,7 +5,7 @@
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -245,6 +245,258 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
}
EXPORT_SYMBOL(ib_create_qp);
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1];
+ enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1];
+} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN),
+ [IB_QPT_RC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ },
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_RC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = {
+ [IB_QPT_UD] = IB_QP_SQ_PSN,
+ [IB_QPT_UC] = IB_QP_SQ_PSN,
+ [IB_QPT_RC] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_SMI] = IB_QP_SQ_PSN,
+ [IB_QPT_GSI] = IB_QP_SQ_PSN,
+ },
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
+ }
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_AV |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_RC] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = {
+ [IB_QPT_UD] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_CUR_STATE |
+ IB_QP_QKEY),
+ }
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 }
+ }
+};
+
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (cur_state < 0 || cur_state > IB_QPS_ERR ||
+ next_state < 0 || next_state > IB_QPS_ERR)
+ return 0;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return 0;
+
+ if (!qp_state_table[cur_state][next_state].valid)
+ return 0;
+
+ req_param = qp_state_table[cur_state][next_state].req_param[type];
+ opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+
+ if ((mask & req_param) != req_param)
+ return 0;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(ib_modify_qp_is_ok);
+
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
@@ -322,11 +574,10 @@ int ib_destroy_cq(struct ib_cq *cq)
}
EXPORT_SYMBOL(ib_destroy_cq);
-int ib_resize_cq(struct ib_cq *cq,
- int cqe)
+int ib_resize_cq(struct ib_cq *cq, int cqe)
{
return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe) : -ENOSYS;
+ cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
}
EXPORT_SYMBOL(ib_resize_cq);
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index a19e0ed03d7c..f023d3936518 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -147,7 +147,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
switch (ah->type) {
case MTHCA_AH_ON_HCA:
mthca_free(&dev->av_table.alloc,
- (ah->avdma - dev->av_table.ddr_av_base) /
+ (ah->avdma - dev->av_table.ddr_av_base) /
MTHCA_AV_SIZE);
break;
@@ -193,6 +193,37 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
return 0;
}
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ struct mthca_ah *ah = to_mah(ibah);
+ struct mthca_dev *dev = to_mdev(ibah->device);
+
+ /* Only implement for MAD and memfree ah for now. */
+ if (ah->type == MTHCA_AH_ON_HCA)
+ return -ENOSYS;
+
+ memset(attr, 0, sizeof *attr);
+ attr->dlid = be16_to_cpu(ah->av->dlid);
+ attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+ attr->static_rate = ah->av->msg_sr & 0x7;
+ attr->src_path_bits = ah->av->g_slid & 0x7F;
+ attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
+ attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
+
+ if (attr->ah_flags) {
+ attr->grh.traffic_class =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
+ attr->grh.flow_label =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
+ attr->grh.hop_limit = ah->av->hop_limit;
+ attr->grh.sgid_index = ah->av->gid_index &
+ (dev->limits.gid_table_len - 1);
+ memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ }
+
+ return 0;
+}
+
int __devinit mthca_init_av_table(struct mthca_dev *dev)
{
int err;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 2825615ce81c..343eca507870 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -182,25 +182,58 @@ struct mthca_cmd_context {
u8 status;
};
+static int fw_cmd_doorbell = 1;
+module_param(fw_cmd_doorbell, int, 0644);
+MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
+ "(and supported by FW)");
+
static inline int go_bit(struct mthca_dev *dev)
{
return readl(dev->hcr + HCR_STATUS_OFFSET) &
swab32(1 << HCR_GO_BIT);
}
-static int mthca_cmd_post(struct mthca_dev *dev,
- u64 in_param,
- u64 out_param,
- u32 in_modifier,
- u8 op_modifier,
- u16 op,
- u16 token,
- int event)
+static void mthca_cmd_post_dbell(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token)
{
- int err = 0;
+ void __iomem *ptr = dev->cmd.dbell_map;
+ u16 *offs = dev->cmd.dbell_offsets;
- mutex_lock(&dev->cmd.hcr_mutex);
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (1 << HCA_E_BIT) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), ptr + offs[6]);
+ wmb();
+ __raw_writel((__force u32) 0, ptr + offs[7]);
+ wmb();
+}
+static int mthca_cmd_post_hcr(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
if (event) {
unsigned long end = jiffies + GO_BIT_TIMEOUT;
@@ -210,10 +243,8 @@ static int mthca_cmd_post(struct mthca_dev *dev,
}
}
- if (go_bit(dev)) {
- err = -EAGAIN;
- goto out;
- }
+ if (go_bit(dev))
+ return -EAGAIN;
/*
* We use writel (instead of something like memcpy_toio)
@@ -236,7 +267,29 @@ static int mthca_cmd_post(struct mthca_dev *dev,
(op_modifier << HCR_OPMOD_SHIFT) |
op), dev->hcr + 6 * 4);
-out:
+ return 0;
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
+ int err = 0;
+
+ mutex_lock(&dev->cmd.hcr_mutex);
+
+ if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
+ mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token);
+ else
+ err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+
mutex_unlock(&dev->cmd.hcr_mutex);
return err;
}
@@ -275,7 +328,7 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
}
if (out_is_imm)
- *out_param =
+ *out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
@@ -386,7 +439,7 @@ static int mthca_cmd_box(struct mthca_dev *dev,
unsigned long timeout,
u8 *status)
{
- if (dev->cmd.use_events)
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, &out_param, 0,
in_modifier, op_modifier, op,
timeout, status);
@@ -423,7 +476,7 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
unsigned long timeout,
u8 *status)
{
- if (dev->cmd.use_events)
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, out_param, 1,
in_modifier, op_modifier, op,
timeout, status);
@@ -437,7 +490,7 @@ int mthca_cmd_init(struct mthca_dev *dev)
{
mutex_init(&dev->cmd.hcr_mutex);
sema_init(&dev->cmd.poll_sem, 1);
- dev->cmd.use_events = 0;
+ dev->cmd.flags = 0;
dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
MTHCA_HCR_SIZE);
@@ -461,6 +514,8 @@ void mthca_cmd_cleanup(struct mthca_dev *dev)
{
pci_pool_destroy(dev->cmd.pool);
iounmap(dev->hcr);
+ if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
+ iounmap(dev->cmd.dbell_map);
}
/*
@@ -498,7 +553,8 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
; /* nothing */
--dev->cmd.token_mask;
- dev->cmd.use_events = 1;
+ dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
+
down(&dev->cmd.poll_sem);
return 0;
@@ -511,7 +567,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
{
int i;
- dev->cmd.use_events = 0;
+ dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
for (i = 0; i < dev->cmd.max_cmds; ++i)
down(&dev->cmd.event_sem);
@@ -596,8 +652,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
* address or size and use that as our log2 size.
*/
lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
- if (lg < 12) {
- mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n",
+ if (lg < MTHCA_ICM_PAGE_SHIFT) {
+ mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
+ MTHCA_ICM_PAGE_SIZE,
(unsigned long long) mthca_icm_addr(&iter),
mthca_icm_size(&iter));
err = -EINVAL;
@@ -609,8 +666,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
virt += 1 << lg;
}
- pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
- (i << lg)) | (lg - 12));
+ pages[nent * 2 + 1] =
+ cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) |
+ (lg - MTHCA_ICM_PAGE_SHIFT));
ts += 1 << (lg - 10);
++tc;
@@ -661,12 +719,41 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
}
+static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
+{
+ unsigned long addr;
+ u16 max_off = 0;
+ int i;
+
+ for (i = 0; i < 8; ++i)
+ max_off = max(max_off, dev->cmd.dbell_offsets[i]);
+
+ if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
+ mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
+ "length 0x%x crosses a page boundary\n",
+ (unsigned long long) base, max_off);
+ return;
+ }
+
+ addr = pci_resource_start(dev->pdev, 2) +
+ ((pci_resource_len(dev->pdev, 2) - 1) & base);
+ dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
+ if (!dev->cmd.dbell_map)
+ return;
+
+ dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
+ mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
+}
+
int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
{
struct mthca_mailbox *mailbox;
u32 *outbox;
+ u64 base;
+ u32 tmp;
int err = 0;
u8 lg;
+ int i;
#define QUERY_FW_OUT_SIZE 0x100
#define QUERY_FW_VER_OFFSET 0x00
@@ -674,6 +761,10 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
#define QUERY_FW_ERR_START_OFFSET 0x30
#define QUERY_FW_ERR_SIZE_OFFSET 0x38
+#define QUERY_FW_CMD_DB_EN_OFFSET 0x10
+#define QUERY_FW_CMD_DB_OFFSET 0x50
+#define QUERY_FW_CMD_DB_BASE 0x60
+
#define QUERY_FW_START_OFFSET 0x20
#define QUERY_FW_END_OFFSET 0x28
@@ -702,16 +793,29 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
((dev->fw_ver & 0xffff0000ull) >> 16) |
((dev->fw_ver & 0x0000ffffull) << 16);
+ mthca_dbg(dev, "FW version %012llx, max commands %d\n",
+ (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+
MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
dev->cmd.max_cmds = 1 << lg;
MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
- mthca_dbg(dev, "FW version %012llx, max commands %d\n",
- (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
(unsigned long long) dev->catas_err.addr, dev->catas_err.size);
+ MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
+ if (tmp & 0x1) {
+ mthca_dbg(dev, "FW supports commands through doorbells\n");
+
+ MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
+ for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
+ MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
+ QUERY_FW_CMD_DB_OFFSET + (i << 1));
+
+ mthca_setup_cmd_doorbells(dev, base);
+ }
+
if (mthca_is_memfree(dev)) {
MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
@@ -720,12 +824,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
/*
- * Arbel page size is always 4 KB; round up number of
- * system pages needed.
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
*/
dev->fw.arbel.fw_pages =
- ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >>
- (PAGE_SHIFT - 12);
+ ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
(unsigned long long) dev->fw.arbel.clr_int_base,
@@ -1173,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
int err;
#define INIT_HCA_IN_SIZE 0x200
-#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_FLAGS1_OFFSET 0x00c
+#define INIT_HCA_FLAGS2_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1216,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
memset(inbox, 0, INIT_HCA_IN_SIZE);
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
+
#if defined(__LITTLE_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
#elif defined(__BIG_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
#else
#error Host endianness not defined
#endif
/* Check port for UD address vector: */
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
/* We leave wqe_quota, responder_exu, etc as 0 (default) */
@@ -1438,11 +1546,11 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
return ret;
/*
- * Arbel page size is always 4 KB; round up number of system
- * pages needed.
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
*/
- *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
- *aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12);
+ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
return 0;
}
@@ -1514,6 +1622,37 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
CMD_TIME_CLASS_A, status);
}
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+ u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ __be32 *inbox;
+ int err;
+
+#define RESIZE_CQ_IN_SIZE 0x40
+#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c
+#define RESIZE_CQ_LKEY_OFFSET 0x1c
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, RESIZE_CQ_IN_SIZE);
+ /*
+ * Leave start address fields zeroed out -- mthca assumes that
+ * MRs for CQs always start at virtual address 0.
+ */
+ MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
+ MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
+
+ err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
+ CMD_TIME_CLASS_B, status);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num, u8 *status)
{
@@ -1529,37 +1668,69 @@ int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
CMD_TIME_CLASS_A, status);
}
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
+ CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status);
+}
+
int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
{
return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
CMD_TIME_CLASS_B, status);
}
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask,
u8 *status)
{
- static const u16 op[] = {
- [MTHCA_TRANS_RST2INIT] = CMD_RST2INIT_QPEE,
- [MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE,
- [MTHCA_TRANS_INIT2RTR] = CMD_INIT2RTR_QPEE,
- [MTHCA_TRANS_RTR2RTS] = CMD_RTR2RTS_QPEE,
- [MTHCA_TRANS_RTS2RTS] = CMD_RTS2RTS_QPEE,
- [MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE,
- [MTHCA_TRANS_ANY2ERR] = CMD_2ERR_QPEE,
- [MTHCA_TRANS_RTS2SQD] = CMD_RTS2SQD_QPEE,
- [MTHCA_TRANS_SQD2SQD] = CMD_SQD2SQD_QPEE,
- [MTHCA_TRANS_SQD2RTS] = CMD_SQD2RTS_QPEE,
- [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
+ static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_RST2INIT_QPEE,
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_INIT2INIT_QPEE,
+ [IB_QPS_RTR] = CMD_INIT2RTR_QPEE,
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTR2RTS_QPEE,
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTS2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_RTS2SQD_QPEE,
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQD2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_SQD2SQD_QPEE,
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE,
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ }
};
+
u8 op_mod = 0;
int my_mailbox = 0;
int err;
- if (trans < 0 || trans >= ARRAY_SIZE(op))
- return -EINVAL;
-
- if (trans == MTHCA_TRANS_ANY2RST) {
+ if (op[cur][next] == CMD_ERR2RST_QPEE) {
op_mod = 3; /* don't write outbox, any->reset */
/* For debugging */
@@ -1571,34 +1742,35 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
} else
mailbox = NULL;
}
- } else {
- if (0) {
+
+ err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ (!!is_ee << 24) | num, op_mod,
+ op[cur][next], CMD_TIME_CLASS_C, status);
+
+ if (0 && mailbox) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
+ printk(" %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
- printk(" [%02x] ", i * 4);
+ printk("[%02x] ", i * 4);
printk(" %08x",
be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
printk("\n");
}
}
- }
-
- if (trans == MTHCA_TRANS_ANY2RST) {
- err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
- (!!is_ee << 24) | num, op_mod,
- op[trans], CMD_TIME_CLASS_C, status);
- if (0 && mailbox) {
+ if (my_mailbox)
+ mthca_free_mailbox(dev, mailbox);
+ } else {
+ if (0) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" %08x\n", be32_to_cpup(mailbox->buf));
+ printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
- printk("[%02x] ", i * 4);
+ printk(" [%02x] ", i * 4);
printk(" %08x",
be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
@@ -1606,12 +1778,9 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
}
}
- } else
- err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
- op_mod, op[trans], CMD_TIME_CLASS_C, status);
-
- if (my_mailbox)
- mthca_free_mailbox(dev, mailbox);
+ err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
+ op_mod, op[cur][next], CMD_TIME_CLASS_C, status);
+ }
return err;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 18175bec84c2..e4ec35c40dd3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -73,9 +74,9 @@ enum {
MTHCA_CMD_STAT_REG_BOUND = 0x21,
/* HCA local attached memory not present: */
MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
- /* Bad management packet (silently discarded): */
+ /* Bad management packet (silently discarded): */
MTHCA_CMD_STAT_BAD_PKT = 0x30,
- /* More outstanding CQEs in CQ than new CQ size: */
+ /* More outstanding CQEs in CQ than new CQ size: */
MTHCA_CMD_STAT_BAD_SIZE = 0x40
};
@@ -298,13 +299,18 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+ u8 *status);
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num, u8 *status);
int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num, u8 *status);
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox, u8 *status);
int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask,
u8 *status);
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
struct mthca_mailbox *mailbox, u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 96f1a86bf049..76aabc5bf371 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@@ -150,24 +150,29 @@ struct mthca_err_cqe {
#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
-static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
+ int entry)
{
- if (cq->is_direct)
- return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
+ if (buf->is_direct)
+ return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
else
- return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
}
-static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
+static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+{
+ return get_cqe_from_buf(&cq->buf, entry);
+}
+
+static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
{
- struct mthca_cqe *cqe = get_cqe(cq, i);
return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
}
static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
{
- return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
+ return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
}
static inline void set_cqe_hw(struct mthca_cqe *cqe)
@@ -289,7 +294,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->cons_index;
- cqe_sw(cq, prod_index & cq->ibcq.cqe);
+ cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
++prod_index)
if (prod_index == cq->cons_index + cq->ibcq.cqe)
break;
@@ -324,12 +329,58 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
wake_up(&cq->wait);
}
-static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
- struct mthca_qp *qp, int wqe_index, int is_send,
- struct mthca_err_cqe *cqe,
- struct ib_wc *entry, int *free_cqe)
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
+{
+ int i;
+
+ /*
+ * In Tavor mode, the hardware keeps the consumer and producer
+ * indices mod the CQ size. Since we might be making the CQ
+ * bigger, we need to deal with the case where the producer
+ * index wrapped around before the CQ was resized.
+ */
+ if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
+ cq->ibcq.cqe < cq->resize_buf->cqe) {
+ cq->cons_index &= cq->ibcq.cqe;
+ if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
+ cq->cons_index -= cq->ibcq.cqe + 1;
+ }
+
+ for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
+ memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+ i & cq->resize_buf->cqe),
+ get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
+}
+
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
+{
+ int ret;
+ int i;
+
+ ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
+ MTHCA_MAX_DIRECT_CQ_SIZE,
+ &buf->queue, &buf->is_direct,
+ &dev->driver_pd, 1, &buf->mr);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe_from_buf(buf, i));
+
+ return 0;
+}
+
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
+{
+ mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
+ buf->is_direct, &buf->mr);
+}
+
+static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
+ struct mthca_qp *qp, int wqe_index, int is_send,
+ struct mthca_err_cqe *cqe,
+ struct ib_wc *entry, int *free_cqe)
{
- int err;
int dbd;
__be32 new_wqe;
@@ -412,11 +463,9 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
* error case, so we don't have to check the doorbell count, etc.
*/
if (mthca_is_memfree(dev))
- return 0;
+ return;
- err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
- if (err)
- return err;
+ mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
/*
* If we're at the end of the WQE chain, or we've used up our
@@ -424,15 +473,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
* the next poll operation.
*/
if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
- return 0;
+ return;
cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
cqe->wqe = new_wqe;
cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
*free_cqe = 0;
-
- return 0;
}
static inline int mthca_poll_one(struct mthca_dev *dev,
@@ -518,9 +565,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
}
if (is_error) {
- err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
- (struct mthca_err_cqe *) cqe,
- entry, &free_cqe);
+ handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
+ (struct mthca_err_cqe *) cqe,
+ entry, &free_cqe);
goto out;
}
@@ -614,11 +661,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
spin_lock_irqsave(&cq->lock, flags);
- for (npolled = 0; npolled < num_entries; ++npolled) {
+ npolled = 0;
+repoll:
+ while (npolled < num_entries) {
err = mthca_poll_one(dev, cq, &qp,
&freed, entry + npolled);
if (err)
break;
+ ++npolled;
}
if (freed) {
@@ -626,6 +676,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
update_cons_index(dev, cq, freed);
}
+ /*
+ * If a CQ resize is in progress and we discovered that the
+ * old buffer is empty, then peek in the new buffer, and if
+ * it's not empty, switch to the new buffer and continue
+ * polling there.
+ */
+ if (unlikely(err == -EAGAIN && cq->resize_buf &&
+ cq->resize_buf->state == CQ_RESIZE_READY)) {
+ /*
+ * In Tavor mode, the hardware keeps the producer
+ * index modulo the CQ size. Since we might be making
+ * the CQ bigger, we need to mask our consumer index
+ * using the size of the old CQ buffer before looking
+ * in the new CQ buffer.
+ */
+ if (!mthca_is_memfree(dev))
+ cq->cons_index &= cq->ibcq.cqe;
+
+ if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
+ cq->cons_index & cq->resize_buf->cqe))) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+
+ cq->resize_buf->buf = tbuf;
+ cq->resize_buf->cqe = tcqe;
+ cq->resize_buf->state = CQ_RESIZE_SWAPPED;
+
+ goto repoll;
+ }
+ }
+
spin_unlock_irqrestore(&cq->lock, flags);
return err == 0 || err == -EAGAIN ? npolled : err;
@@ -684,24 +770,14 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
-static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
-{
- mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
- &cq->queue, cq->is_direct, &cq->mr);
-}
-
int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
- int size = nent * MTHCA_CQ_ENTRY_SIZE;
struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
u8 status;
- int i;
-
- might_sleep();
cq->ibcq.cqe = nent - 1;
cq->is_kernel = !ctx;
@@ -739,14 +815,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context = mailbox->buf;
if (cq->is_kernel) {
- err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE,
- &cq->queue, &cq->is_direct,
- &dev->driver_pd, 1, &cq->mr);
+ err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
if (err)
goto err_out_mailbox;
-
- for (i = 0; i < nent; ++i)
- set_cqe_hw(get_cqe(cq, i));
}
spin_lock_init(&cq->lock);
@@ -765,7 +836,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
cq_context->pd = cpu_to_be32(pdn);
- cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
+ cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
if (mthca_is_memfree(dev)) {
@@ -803,7 +874,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
err_out_free_mr:
if (cq->is_kernel)
- mthca_free_cq_buf(dev, cq);
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
@@ -832,8 +903,6 @@ void mthca_free_cq(struct mthca_dev *dev,
int err;
u8 status;
- might_sleep();
-
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
mthca_warn(dev, "No memory for mailbox to free CQ.\n");
@@ -871,7 +940,7 @@ void mthca_free_cq(struct mthca_dev *dev,
wait_event(cq->wait, !atomic_read(&cq->refcount));
if (cq->is_kernel) {
- mthca_free_cq_buf(dev, cq);
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e481037288d6..ad52edbefe98 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@@ -53,8 +53,8 @@
#define DRV_NAME "ib_mthca"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "0.07"
-#define DRV_RELDATE "February 13, 2006"
+#define DRV_VERSION "0.08"
+#define DRV_RELDATE "February 14, 2006"
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -64,7 +64,8 @@ enum {
MTHCA_FLAG_NO_LAM = 1 << 5,
MTHCA_FLAG_FMR = 1 << 6,
MTHCA_FLAG_MEMFREE = 1 << 7,
- MTHCA_FLAG_PCIE = 1 << 8
+ MTHCA_FLAG_PCIE = 1 << 8,
+ MTHCA_FLAG_SINAI_OPT = 1 << 9
};
enum {
@@ -110,9 +111,17 @@ enum {
MTHCA_OPCODE_INVALID = 0xff
};
+enum {
+ MTHCA_CMD_USE_EVENTS = 1 << 0,
+ MTHCA_CMD_POST_DOORBELLS = 1 << 1
+};
+
+enum {
+ MTHCA_CMD_NUM_DBELL_DWORDS = 8
+};
+
struct mthca_cmd {
struct pci_pool *pool;
- int use_events;
struct mutex hcr_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
@@ -121,6 +130,9 @@ struct mthca_cmd {
int free_head;
struct mthca_cmd_context *context;
u16 token_mask;
+ u32 flags;
+ void __iomem *dbell_map;
+ u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
};
struct mthca_limits {
@@ -470,12 +482,16 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
enum ib_event_type event_type);
void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
struct mthca_srq *srq);
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask);
+int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type);
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
@@ -486,6 +502,8 @@ int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
enum ib_event_type event_type);
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
@@ -495,8 +513,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
- int index, int *dbd, __be32 *new_wqe);
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe);
int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -522,6 +540,7 @@ int mthca_create_ah(struct mthca_dev *dev,
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
int mthca_ah_grh_present(struct mthca_ah *ah);
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 2eabb27804cd..cbdc348fb689 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -497,7 +497,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
- npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
+ npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -825,7 +825,7 @@ void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
{
u8 status;
- mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
@@ -928,7 +928,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
- for (i = 0; i < MTHCA_EQ_CMD; ++i)
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (mthca_is_memfree(dev))
arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
else
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 1229c604c6e0..4ace6a392f41 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -109,6 +109,19 @@ static void smp_snoop(struct ib_device *ibdev,
}
}
+static void node_desc_override(struct ib_device *dev,
+ struct ib_mad *mad)
+{
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+ mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+ mutex_lock(&to_mdev(dev)->cap_mask_mutex);
+ memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+ mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
+ }
+}
+
static void forward_trap(struct mthca_dev *dev,
u8 port_num,
struct ib_mad *mad)
@@ -207,8 +220,10 @@ int mthca_process_mad(struct ib_device *ibdev,
return IB_MAD_RESULT_FAILURE;
}
- if (!out_mad->mad_hdr.status)
+ if (!out_mad->mad_hdr.status) {
smp_snoop(ibdev, port_num, in_mad);
+ node_desc_override(ibdev, out_mad);
+ }
/* set return bit in status of directed route responses */
if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 9c849d27b06e..266f347c6707 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -935,13 +935,19 @@ enum {
static struct {
u64 latest_fw;
- int is_memfree;
- int is_pcie;
+ u32 flags;
} mthca_hca_table[] = {
- [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 },
- [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 },
- [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 },
- [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
+ .flags = 0 },
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400),
+ .flags = MTHCA_FLAG_PCIE },
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE },
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 800),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE |
+ MTHCA_FLAG_SINAI_OPT }
};
static int __devinit mthca_init_one(struct pci_dev *pdev,
@@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
mdev->pdev = pdev;
+ mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
if (ddr_hidden)
mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
- if (mthca_hca_table[id->driver_data].is_memfree)
- mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
- if (mthca_hca_table[id->driver_data].is_pcie)
- mdev->mthca_flags |= MTHCA_FLAG_PCIE;
/*
* Now reset the HCA before we touch the PCI capabilities or
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 321f11e707f2..9965bda9afed 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -187,7 +187,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
- mthca_dbg(dev, "QP %06x already a member of MGM\n",
+ mthca_dbg(dev, "QP %06x already a member of MGM\n",
ibqp->qp_num);
err = 0;
goto out;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index d709cb162a72..15cc2f6eb475 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -202,7 +202,8 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
if (--table->icm[i]->refcount == 0) {
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
mthca_free_icm(dev, table->icm[i]);
table->icm[i] = NULL;
}
@@ -336,7 +337,8 @@ err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
mthca_free_icm(dev, table->icm[i]);
}
@@ -353,7 +355,8 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
mthca_free_icm(dev, table->icm[i]);
}
@@ -364,7 +367,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
{
return dev->uar_table.uarc_base +
uar->index * dev->uar_table.uarc_size +
- page * 4096;
+ page * MTHCA_ICM_PAGE_SIZE;
}
int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
@@ -401,7 +404,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
if (ret < 0)
goto out;
- db_tab->page[i].mem.length = 4096;
+ db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -455,7 +458,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
if (!mthca_is_memfree(dev))
return NULL;
- npages = dev->uar_table.uarc_size / 4096;
+ npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
if (!db_tab)
return ERR_PTR(-ENOMEM);
@@ -478,7 +481,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (!mthca_is_memfree(dev))
return;
- for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+ for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -551,20 +554,20 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
page = dev->db_tab->page + end;
alloc:
- page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
+ page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
&page->mapping, GFP_KERNEL);
if (!page->db_rec) {
ret = -ENOMEM;
goto out;
}
- memset(page->db_rec, 0, 4096);
+ memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
ret = mthca_MAP_ICM_page(dev, page->mapping,
mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
if (!ret && status)
ret = -EINVAL;
if (ret) {
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
goto out;
}
@@ -612,7 +615,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
i >= dev->db_tab->max_group1 - 1) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
page->db_rec = NULL;
@@ -640,7 +643,7 @@ int mthca_init_db_tab(struct mthca_dev *dev)
mutex_init(&dev->db_tab->mutex);
- dev->db_tab->npages = dev->uar_table.uarc_size / 4096;
+ dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
dev->db_tab->max_group1 = 0;
dev->db_tab->min_group2 = dev->db_tab->npages - 1;
@@ -681,7 +684,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
- dma_free_coherent(&dev->pdev->dev, 4096,
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
dev->db_tab->page[i].db_rec,
dev->db_tab->page[i].mapping);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 36f1141a08aa..6d42947e1dc4 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -45,6 +45,12 @@
((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
(sizeof (struct scatterlist)))
+enum {
+ MTHCA_ICM_PAGE_SHIFT = 12,
+ MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT,
+ MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8
+};
+
struct mthca_icm_chunk {
struct list_head list;
int npages;
@@ -131,10 +137,6 @@ static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
-enum {
- MTHCA_DB_REC_PER_PAGE = 4096 / 8
-};
-
struct mthca_db_page {
DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
__be64 *db_rec;
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index e995e2aa016d..698b62125765 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
#define MTHCA_MPT_STATUS_SW 0xF0
#define MTHCA_MPT_STATUS_HW 0x00
+#define SINAI_FMR_KEY_INC 0x1000000
+
/*
* Buddy allocator for MTT segments (currently not very efficient
* since it doesn't keep a free list and just searches linearly
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
return tavor_key_to_hw_index(key);
}
+static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
+{
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ return ((key << 20) & 0x800000) | (key & 0x7fffff);
+ else
+ return key;
+}
+
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
{
@@ -340,13 +350,12 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
int err;
u8 status;
- might_sleep();
-
WARN_ON(buffer_size_shift >= 32);
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
if (mthca_is_memfree(dev)) {
@@ -467,8 +476,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
int err;
u8 status;
- might_sleep();
-
err = mthca_HW2SW_MPT(dev, NULL,
key_to_hw_index(dev, mr->ibmr.lkey) &
(dev->limits.num_mpts - 1),
@@ -495,9 +502,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
int err = -ENOMEM;
int i;
- might_sleep();
-
- if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
+ if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
return -EINVAL;
/* For Arbel, all MTTs must fit in the same page. */
@@ -510,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
idx = key & (dev->limits.num_mpts - 1);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -523,7 +529,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
BUG_ON(!mr->mem.arbel.mpt);
} else
mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
- sizeof *(mr->mem.tavor.mpt) * idx;
+ sizeof *(mr->mem.tavor.mpt) * idx;
mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
if (IS_ERR(mr->mtt))
@@ -549,7 +555,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
MTHCA_MPT_FLAG_REGION |
access);
- mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
+ mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
memset(&mpt_entry->start, 0,
@@ -617,7 +623,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
if (list_len > fmr->attr.max_pages)
return -EINVAL;
- page_mask = (1 << fmr->attr.page_size) - 1;
+ page_mask = (1 << fmr->attr.page_shift) - 1;
/* We are getting page lists, so va must be page aligned. */
if (iova & page_mask)
@@ -665,7 +671,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
}
mpt_entry.lkey = cpu_to_be32(key);
- mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
mpt_entry.start = cpu_to_be64(iova);
__raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
@@ -693,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
++fmr->maps;
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key += dev->limits.num_mpts;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ key += SINAI_FMR_KEY_INC;
+ else
+ key += dev->limits.num_mpts;
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
@@ -706,7 +715,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
fmr->mem.arbel.mpt->key = cpu_to_be32(key);
fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
- fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
wmb();
@@ -766,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
else
dev->mthca_flags |= MTHCA_FLAG_FMR;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ mthca_dbg(dev, "Memory key throughput optimization activated.\n");
+
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
fls(dev->limits.num_mtt_segs - 1));
@@ -785,7 +797,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
}
dev->mr_table.tavor_fmr.mpt_base =
- ioremap(dev->mr_table.mpt_base,
+ ioremap(dev->mr_table.mpt_base,
(1 << i) * sizeof (struct mthca_mpt_entry));
if (!dev->mr_table.tavor_fmr.mpt_base) {
@@ -813,7 +825,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
goto err_reserve_fmr;
dev->mr_table.fmr_mtt_buddy =
- &dev->mr_table.tavor_fmr.mtt_buddy;
+ &dev->mr_table.tavor_fmr.mtt_buddy;
} else
dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index 3dbf06a6e6f4..105fc5faaddf 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -43,8 +43,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
{
int err = 0;
- might_sleep();
-
pd->privileged = privileged;
atomic_set(&pd->sqp_count, 0);
@@ -66,7 +64,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
{
- might_sleep();
if (pd->privileged)
mthca_free_mr(dev, &pd->ntmr);
mthca_free(&dev->pd_table.alloc, pd->pd_num);
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 08a909371b0a..58d44aa3c302 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
}
if (total_size > mem_avail) {
mthca_err(dev, "Profile requires 0x%llx bytes; "
- "won't in 0x%llx bytes of context memory.\n",
+ "won't fit in 0x%llx bytes of context memory.\n",
(unsigned long long) total_size,
(unsigned long long) mem_avail);
kfree(profile);
@@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
*/
dev->limits.num_pds = MTHCA_NUM_PDS;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
+ init_hca->log_mpt_sz > 23) {
+ mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
+ init_hca->log_mpt_sz);
+ mthca_warn(dev, "Disabling memory key throughput optimization.\n");
+ dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
+ }
+
/*
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
* systems it may use too much vmalloc space to map all MTT
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index e88e39aef85a..2c250bc11c33 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@@ -108,12 +108,12 @@ static int mthca_query_device(struct ib_device *ibdev,
props->max_srq_wr = mdev->limits.max_srq_wqes;
props->max_srq_sge = mdev->limits.max_sg;
props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
- props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
+ props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = mdev->limits.pkey_table_len;
props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
- props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
err = 0;
@@ -176,6 +176,23 @@ static int mthca_query_port(struct ib_device *ibdev,
return err;
}
+static int mthca_modify_device(struct ib_device *ibdev,
+ int mask,
+ struct ib_device_modify *props)
+{
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ }
+
+ return 0;
+}
+
static int mthca_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
@@ -669,9 +686,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
}
if (context) {
- cq->mr.ibmr.lkey = ucmd.lkey;
- cq->set_ci_db_index = ucmd.set_db_index;
- cq->arm_db_index = ucmd.arm_db_index;
+ cq->buf.mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
}
for (nent = 1; nent <= entries; nent <<= 1)
@@ -689,6 +706,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
goto err_free;
}
+ cq->resize_buf = NULL;
+
return &cq->ibcq;
err_free:
@@ -707,6 +726,121 @@ err_unmap_set:
return ERR_PTR(err);
}
+static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
+ int entries)
+{
+ int ret;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ if (!cq->resize_buf) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ cq->resize_buf->state = CQ_RESIZE_ALLOC;
+
+ ret = 0;
+
+unlock:
+ spin_unlock_irq(&cq->lock);
+
+ if (ret)
+ return ret;
+
+ ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+ if (ret) {
+ spin_lock_irq(&cq->lock);
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ return ret;
+ }
+
+ cq->resize_buf->cqe = entries - 1;
+
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf->state = CQ_RESIZE_READY;
+ spin_unlock_irq(&cq->lock);
+
+ return 0;
+}
+
+static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibcq->device);
+ struct mthca_cq *cq = to_mcq(ibcq);
+ struct mthca_resize_cq ucmd;
+ u32 lkey;
+ u8 status;
+ int ret;
+
+ if (entries < 1 || entries > dev->limits.max_cqes)
+ return -EINVAL;
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries == ibcq->cqe + 1)
+ return 0;
+
+ if (cq->is_kernel) {
+ ret = mthca_alloc_resize_buf(dev, cq, entries);
+ if (ret)
+ return ret;
+ lkey = cq->resize_buf->buf.mr.ibmr.lkey;
+ } else {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return -EFAULT;
+ lkey = ucmd.lkey;
+ }
+
+ ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
+ if (status)
+ ret = -EINVAL;
+
+ if (ret) {
+ if (cq->resize_buf) {
+ mthca_free_cq_buf(dev, &cq->resize_buf->buf,
+ cq->resize_buf->cqe);
+ kfree(cq->resize_buf);
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ }
+ return ret;
+ }
+
+ if (cq->is_kernel) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf->state == CQ_RESIZE_READY) {
+ mthca_cq_resize_copy_cqes(cq);
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+ } else {
+ tbuf = cq->resize_buf->buf;
+ tcqe = cq->resize_buf->cqe;
+ }
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+
+ mthca_free_cq_buf(dev, &tbuf, tcqe);
+ } else
+ ibcq->cqe = entries - 1;
+
+ return 0;
+}
+
static int mthca_destroy_cq(struct ib_cq *cq)
{
if (cq->uobject) {
@@ -1070,6 +1204,20 @@ static int mthca_init_node_data(struct mthca_dev *dev)
goto out;
init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mthca_MAD_IFC(dev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(dev, 1, 1,
@@ -1113,14 +1261,17 @@ int mthca_register_device(struct mthca_dev *dev)
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
@@ -1128,6 +1279,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.class_dev.dev = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
+ dev->ib_dev.modify_device = mthca_modify_device;
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
@@ -1137,11 +1289,13 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
+ dev->ib_dev.query_ah = mthca_ah_query;
dev->ib_dev.destroy_ah = mthca_ah_destroy;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
dev->ib_dev.create_srq = mthca_create_srq;
- dev->ib_dev.modify_srq = mthca_modify_srq;
+ dev->ib_dev.modify_srq = mthca_modify_srq;
+ dev->ib_dev.query_srq = mthca_query_srq;
dev->ib_dev.destroy_srq = mthca_destroy_srq;
if (mthca_is_memfree(dev))
@@ -1152,8 +1306,10 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.create_qp = mthca_create_qp;
dev->ib_dev.modify_qp = mthca_modify_qp;
+ dev->ib_dev.query_qp = mthca_query_qp;
dev->ib_dev.destroy_qp = mthca_destroy_qp;
dev->ib_dev.create_cq = mthca_create_cq;
+ dev->ib_dev.resize_cq = mthca_resize_cq;
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1e73947b4702..2e7f52136965 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ struct mthca_ah {
* - wait_event until ref count is zero
*
* It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
+ * operations (WQE posting or state modification) are pending when a
* QP is destroyed. Also, the consumer must make sure that calls to
- * qp_modify are serialized.
+ * qp_modify are serialized. Similarly, the consumer is responsible
+ * for ensuring that no CQ resize operations are pending when a CQ
+ * is destroyed.
*
* Possible optimizations (wait for profile data to see if/where we
* have locks bouncing between CPUs):
@@ -176,25 +178,40 @@ struct mthca_ah {
* send queue and one for the receive queue)
*/
+struct mthca_cq_buf {
+ union mthca_buf queue;
+ struct mthca_mr mr;
+ int is_direct;
+};
+
+struct mthca_cq_resize {
+ struct mthca_cq_buf buf;
+ int cqe;
+ enum {
+ CQ_RESIZE_ALLOC,
+ CQ_RESIZE_READY,
+ CQ_RESIZE_SWAPPED
+ } state;
+};
+
struct mthca_cq {
- struct ib_cq ibcq;
- spinlock_t lock;
- atomic_t refcount;
- int cqn;
- u32 cons_index;
- int is_direct;
- int is_kernel;
+ struct ib_cq ibcq;
+ spinlock_t lock;
+ atomic_t refcount;
+ int cqn;
+ u32 cons_index;
+ struct mthca_cq_buf buf;
+ struct mthca_cq_resize *resize_buf;
+ int is_kernel;
/* Next fields are Arbel only */
- int set_ci_db_index;
- __be32 *set_ci_db;
- int arm_db_index;
- __be32 *arm_db;
- int arm_sn;
+ int set_ci_db_index;
+ __be32 *set_ci_db;
+ int arm_db_index;
+ __be32 *arm_db;
+ int arm_sn;
- union mthca_buf queue;
- struct mthca_mr mr;
- wait_queue_head_t wait;
+ wait_queue_head_t wait;
};
struct mthca_srq {
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index fba608ed7df2..1bc2678c2fae 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -2,7 +2,7 @@
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -286,207 +286,6 @@ static int to_mthca_st(int transport)
}
}
-static const struct {
- int trans;
- u32 req_param[NUM_TRANS];
- u32 opt_param[NUM_TRANS];
-} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_INIT] = {
- .trans = MTHCA_TRANS_RST2INIT,
- .req_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [UC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- },
- /* bug-for-bug compatibility with VAPI: */
- .opt_param = {
- [MLX] = IB_QP_PORT
- }
- },
- },
- [IB_QPS_INIT] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_INIT] = {
- .trans = MTHCA_TRANS_INIT2INIT,
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [UC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_RTR] = {
- .trans = MTHCA_TRANS_INIT2RTR,
- .req_param = {
- [UC] = (IB_QP_AV |
- IB_QP_PATH_MTU |
- IB_QP_DEST_QPN |
- IB_QP_RQ_PSN),
- [RC] = (IB_QP_AV |
- IB_QP_PATH_MTU |
- IB_QP_DEST_QPN |
- IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC |
- IB_QP_MIN_RNR_TIMER),
- },
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [UC] = (IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX),
- [RC] = (IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_RTR] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_RTR2RTS,
- .req_param = {
- [UD] = IB_QP_SQ_PSN,
- [UC] = IB_QP_SQ_PSN,
- [RC] = (IB_QP_TIMEOUT |
- IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
- IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC),
- [MLX] = IB_QP_SQ_PSN,
- },
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [UC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PATH_MIG_STATE),
- [RC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_RTS] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_RTS2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [UC] = (IB_QP_ACCESS_FLAGS |
- IB_QP_ALT_PATH |
- IB_QP_PATH_MIG_STATE),
- [RC] = (IB_QP_ACCESS_FLAGS |
- IB_QP_ALT_PATH |
- IB_QP_PATH_MIG_STATE |
- IB_QP_MIN_RNR_TIMER),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_SQD] = {
- .trans = MTHCA_TRANS_RTS2SQD,
- },
- },
- [IB_QPS_SQD] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_SQD2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [UC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PATH_MIG_STATE),
- [RC] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- },
- [IB_QPS_SQD] = {
- .trans = MTHCA_TRANS_SQD2SQD,
- .opt_param = {
- [UD] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [UC] = (IB_QP_AV |
- IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX |
- IB_QP_PATH_MIG_STATE),
- [RC] = (IB_QP_AV |
- IB_QP_TIMEOUT |
- IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
- IB_QP_MAX_QP_RD_ATOMIC |
- IB_QP_MAX_DEST_RD_ATOMIC |
- IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
- [MLX] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_SQE] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
- [IB_QPS_RTS] = {
- .trans = MTHCA_TRANS_SQERR2RTS,
- .opt_param = {
- [UD] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- [UC] = (IB_QP_CUR_STATE |
- IB_QP_ACCESS_FLAGS),
- [MLX] = (IB_QP_CUR_STATE |
- IB_QP_QKEY),
- }
- }
- },
- [IB_QPS_ERR] = {
- [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
- [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
- }
-};
-
static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
int attr_mask)
{
@@ -549,23 +348,167 @@ static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
return cpu_to_be32(hw_access_flags);
}
-static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
+static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
+{
+ switch (mthca_state) {
+ case MTHCA_QP_STATE_RST: return IB_QPS_RESET;
+ case MTHCA_QP_STATE_INIT: return IB_QPS_INIT;
+ case MTHCA_QP_STATE_RTR: return IB_QPS_RTR;
+ case MTHCA_QP_STATE_RTS: return IB_QPS_RTS;
+ case MTHCA_QP_STATE_DRAINING:
+ case MTHCA_QP_STATE_SQD: return IB_QPS_SQD;
+ case MTHCA_QP_STATE_SQE: return IB_QPS_SQE;
+ case MTHCA_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
+{
+ switch (mthca_mig_state) {
+ case 0: return IB_MIG_ARMED;
+ case 1: return IB_MIG_REARM;
+ case 3: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mthca_flags)
+{
+ int ib_flags = 0;
+
+ if (mthca_flags & MTHCA_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mthca_flags & MTHCA_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mthca_flags & MTHCA_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
+ struct mthca_qp_path *path)
+{
+ memset(ib_ah_attr, 0, sizeof *path);
+ ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
+ ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
+ ib_ah_attr->static_rate = path->static_rate & 0x7;
+ ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ }
+}
+
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ int err;
+ struct mthca_mailbox *mailbox;
+ struct mthca_qp_param *qp_param;
+ struct mthca_qp_context *context;
+ int mthca_state;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ qp_param = mailbox->buf;
+ context = &qp_param->context;
+ mthca_state = be32_to_cpu(context->flags) >> 28;
+
+ qp_attr->qp_state = to_ib_qp_state(mthca_state);
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->path_mtu = context->mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context->qkey);
+ qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context->params2));
+ qp_attr->cap.max_send_wr = qp->sq.max;
+ qp_attr->cap.max_recv_wr = qp->rq.max;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+ to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+
+ qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+ qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->port_num = qp_attr->ah_attr.port_num;
+ qp_attr->timeout = context->pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_timeout = context->alt_path.ackto >> 3;
+ qp_init_attr->cap = qp_attr->cap;
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
+ struct mthca_qp_path *path)
{
path->g_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
path->static_rate = !!ah->static_rate;
if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ return -1;
+ }
+
path->g_mylmc |= 1 << 7;
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
- path->sl_tclass_flowlabel =
+ path->sl_tclass_flowlabel =
cpu_to_be32((ah->sl << 28) |
- (ah->grh.traffic_class << 20) |
+ (ah->grh.traffic_class << 20) |
(ah->grh.flow_label));
memcpy(path->rgid, ah->grh.dgid.raw, 16);
} else
path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+
+ return 0;
}
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
@@ -576,18 +519,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
- u32 req_param, opt_param;
+ u32 sqd_event = 0;
u8 status;
int err;
if (attr_mask & IB_QP_CUR_STATE) {
- if (attr->cur_qp_state != IB_QPS_RTR &&
- attr->cur_qp_state != IB_QPS_RTS &&
- attr->cur_qp_state != IB_QPS_SQD &&
- attr->cur_qp_state != IB_QPS_SQE)
- return -EINVAL;
- else
- cur_state = attr->cur_qp_state;
+ cur_state = attr->cur_qp_state;
} else {
spin_lock_irq(&qp->sq.lock);
spin_lock(&qp->rq.lock);
@@ -596,44 +533,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
spin_unlock_irq(&qp->sq.lock);
}
- if (attr_mask & IB_QP_STATE) {
- if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
- return -EINVAL;
- new_state = attr->qp_state;
- } else
- new_state = cur_state;
-
- if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
- mthca_dbg(dev, "Illegal QP transition "
- "%d->%d\n", cur_state, new_state);
- return -EINVAL;
- }
-
- req_param = state_table[cur_state][new_state].req_param[qp->transport];
- opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
-
- if ((req_param & attr_mask) != req_param) {
- mthca_dbg(dev, "QP transition "
- "%d->%d missing req attr 0x%08x\n",
- cur_state, new_state,
- req_param & ~attr_mask);
- return -EINVAL;
- }
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
- mthca_dbg(dev, "QP transition (transport %d) "
- "%d->%d has extra attr 0x%08x\n",
- qp->transport,
- cur_state, new_state,
- attr_mask & ~(req_param | opt_param |
- IB_QP_STATE));
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ mthca_dbg(dev, "Bad QP transition (transport %d) "
+ "%d->%d with attr 0x%08x\n",
+ qp->transport, cur_state, new_state,
+ attr_mask);
return -EINVAL;
}
- if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->limits.pkey_table_len) {
- mthca_dbg(dev, "PKey index (%u) too large. max is %d\n",
- attr->pkey_index,dev->limits.pkey_table_len-1);
+ mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+ attr->pkey_index, dev->limits.pkey_table_len-1);
return -EINVAL;
}
@@ -688,8 +601,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (qp->transport == MLX || qp->transport == UD)
qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
- else if (attr_mask & IB_QP_PATH_MTU)
+ else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
+ mthca_dbg(dev, "path MTU (%u) is invalid\n",
+ attr->path_mtu);
+ return -EINVAL;
+ }
qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+ }
if (mthca_is_memfree(dev)) {
if (qp->rq.max)
@@ -733,12 +652,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (attr_mask & IB_QP_RNR_RETRY) {
qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
attr->rnr_retry << 5;
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
}
if (attr_mask & IB_QP_AV) {
- mthca_path_set(&attr->ah_attr, &qp_context->pri_path);
+ if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path))
+ return -EINVAL;
+
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
}
@@ -748,14 +669,22 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
+ attr->alt_pkey_index, dev->limits.pkey_table_len-1);
+ return -EINVAL;
+ }
+
if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
- mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
+ mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
attr->alt_port_num);
return -EINVAL;
}
- mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path);
- qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
+ if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path))
+ return -EINVAL;
+
+ qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
attr->alt_port_num << 24);
qp_context->alt_path.ackto = attr->alt_timeout << 3;
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
@@ -841,23 +770,27 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_context->srqn = cpu_to_be32(1 << 24 |
to_msrq(ibqp->srq)->srqn);
- err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
- qp->qpn, 0, mailbox, 0, &status);
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY &&
+ attr->en_sqd_async_notify)
+ sqd_event = 1 << 31;
+
+ err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
+ mailbox, sqd_event, &status);
+ if (err)
+ goto out;
if (status) {
- mthca_warn(dev, "modify QP %d returned status %02x.\n",
- state_table[cur_state][new_state].trans, status);
+ mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
+ cur_state, new_state, status);
err = -EINVAL;
+ goto out;
}
- if (!err) {
- qp->state = new_state;
- if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
- }
-
- mthca_free_mailbox(dev, mailbox);
+ qp->state = new_state;
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
if (is_sqp(dev, qp))
store_attrs(to_msqp(qp), attr, attr_mask);
@@ -882,7 +815,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (!err && new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
+ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
@@ -901,6 +834,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
}
+out:
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -1078,10 +1013,10 @@ static int mthca_map_memfree(struct mthca_dev *dev,
if (ret)
goto err_qpc;
- ret = mthca_table_get(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- if (ret)
- goto err_eqpc;
+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;
}
@@ -1262,10 +1197,6 @@ int mthca_alloc_qp(struct mthca_dev *dev,
{
int err;
- err = mthca_set_qp_size(dev, cap, pd, qp);
- if (err)
- return err;
-
switch (type) {
case IB_QPT_RC: qp->transport = RC; break;
case IB_QPT_UC: qp->transport = UC; break;
@@ -1273,6 +1204,10 @@ int mthca_alloc_qp(struct mthca_dev *dev,
default: return -EINVAL;
}
+ err = mthca_set_qp_size(dev, cap, pd, qp);
+ if (err)
+ return err;
+
qp->qpn = mthca_alloc(&dev->qp_table.alloc);
if (qp->qpn == -1)
return -ENOMEM;
@@ -1305,6 +1240,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
+ sqp->qp.transport = MLX;
err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
if (err)
return err;
@@ -1393,7 +1329,8 @@ void mthca_free_qp(struct mthca_dev *dev,
wait_event(qp->wait, !atomic_read(&qp->refcount));
if (qp->state != IB_QPS_RESET)
- mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
+ mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+ NULL, 0, &status);
/*
* If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1699,7 +1636,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
mthca_opcode[wr->opcode]);
wmb();
((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
+ cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
if (!size0) {
size0 = size;
@@ -2061,7 +2000,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
mthca_opcode[wr->opcode]);
wmb();
((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32(MTHCA_NEXT_DBD | size);
+ cpu_to_be32(MTHCA_NEXT_DBD | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
if (!size0) {
size0 = size;
@@ -2115,7 +2056,7 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int i;
void *wqe;
- spin_lock_irqsave(&qp->rq.lock, flags);
+ spin_lock_irqsave(&qp->rq.lock, flags);
/* XXX check that state is OK to post receive */
@@ -2182,8 +2123,8 @@ out:
return err;
}
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
- int index, int *dbd, __be32 *new_wqe)
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe)
{
struct mthca_next_seg *next;
@@ -2193,7 +2134,7 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
*/
if (qp->ibqp.srq) {
*new_wqe = 0;
- return 0;
+ return;
}
if (is_send)
@@ -2207,8 +2148,6 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
(next->ee_nds & cpu_to_be32(0x3f));
else
*new_wqe = 0;
-
- return 0;
}
int __devinit mthca_init_qp_table(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index e7e153d9c4c6..0cfd15802217 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -49,7 +49,8 @@ struct mthca_tavor_srq_context {
__be32 state_pd;
__be32 lkey;
__be32 uar;
- __be32 wqe_cnt;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
u32 reserved[2];
};
@@ -204,6 +205,10 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
ds = max(64UL,
roundup_pow_of_two(sizeof (struct mthca_next_seg) +
srq->max_gs * sizeof (struct mthca_data_seg)));
+
+ if (ds > dev->limits.max_desc_sz)
+ return -EINVAL;
+
srq->wqe_shift = long_log2(ds);
srq->srqn = mthca_alloc(&dev->srq_table.alloc);
@@ -271,6 +276,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
srq->first_free = 0;
srq->last_free = srq->max - 1;
+ attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+ attr->max_sge = srq->max_gs;
+
return 0;
err_out_free_srq:
@@ -339,7 +347,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask)
-{
+{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
int ret;
@@ -350,6 +358,8 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
if (attr_mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit > srq->max)
+ return -EINVAL;
ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
if (ret)
return ret;
@@ -360,6 +370,41 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
+int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ struct mthca_mailbox *mailbox;
+ struct mthca_arbel_srq_context *arbel_ctx;
+ struct mthca_tavor_srq_context *tavor_ctx;
+ u8 status;
+ int err;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
+ if (err)
+ goto out;
+
+ if (mthca_is_memfree(dev)) {
+ arbel_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
+ } else {
+ tavor_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
+ }
+
+ srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+ srq_attr->max_sge = srq->max_gs;
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+
+ return err;
+}
+
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type)
{
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index bb015c6494c4..02cc0a766f3a 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -75,6 +75,11 @@ struct mthca_create_cq_resp {
__u32 reserved;
};
+struct mthca_resize_cq {
+ __u32 lkey;
+ __u32 reserved;
+};
+
struct mthca_create_srq {
__u32 lkey;
__u32 db_index;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 2f85a9a831b1..b640107fb732 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -72,13 +72,14 @@ enum {
IPOIB_MAX_MCAST_QUEUE = 3,
IPOIB_FLAG_OPER_UP = 0,
- IPOIB_FLAG_ADMIN_UP = 1,
- IPOIB_PKEY_ASSIGNED = 2,
- IPOIB_PKEY_STOP = 3,
- IPOIB_FLAG_SUBINTERFACE = 4,
- IPOIB_MCAST_RUN = 5,
- IPOIB_STOP_REAPER = 6,
- IPOIB_MCAST_STARTED = 7,
+ IPOIB_FLAG_INITIALIZED = 1,
+ IPOIB_FLAG_ADMIN_UP = 2,
+ IPOIB_PKEY_ASSIGNED = 3,
+ IPOIB_PKEY_STOP = 4,
+ IPOIB_FLAG_SUBINTERFACE = 5,
+ IPOIB_MCAST_RUN = 6,
+ IPOIB_STOP_REAPER = 7,
+ IPOIB_MCAST_STARTED = 8,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -217,10 +218,16 @@ struct ipoib_neigh {
struct list_head list;
};
+/*
+ * We stash a pointer to our private neighbour information after our
+ * hardware address in neigh->ha. The ALIGN() expression here makes
+ * sure that this pointer is stored aligned so that an unaligned
+ * load is not needed to dereference it.
+ */
static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
{
- return (struct ipoib_neigh **) (neigh->ha + 24 -
- (offsetof(struct neighbour, ha) & 4));
+ return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
+ INFINIBAND_ALEN, sizeof(void *));
}
extern struct workqueue_struct *ipoib_workqueue;
@@ -253,7 +260,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 86bcdd72a107..ed65202878d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -416,25 +416,46 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
+ ipoib_ib_dev_stop(dev);
return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+
return 0;
}
+static void ipoib_pkey_dev_check_presence(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ u16 pkey_index = 0;
+
+ if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ else
+ set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+}
+
int ipoib_ib_dev_up(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_dbg(priv, "PKEY is not assigned.\n");
+ return 0;
+ }
+
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
return ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev)
+int ipoib_ib_dev_down(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -449,10 +470,11 @@ int ipoib_ib_dev_down(struct net_device *dev)
set_bit(IPOIB_PKEY_STOP, &priv->flags);
cancel_delayed_work(&priv->pkey_task);
mutex_unlock(&pkey_mutex);
- flush_workqueue(ipoib_workqueue);
+ if (flush)
+ flush_workqueue(ipoib_workqueue);
}
- ipoib_mcast_stop_thread(dev, 1);
+ ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
@@ -481,6 +503,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
struct ipoib_tx_buf *tx_req;
int i;
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+
/*
* Move our QP to the error state and then reinitialize in
* when all work requests have completed or have been flushed.
@@ -585,12 +609,19 @@ void ipoib_ib_dev_flush(void *_dev)
struct net_device *dev = (struct net_device *)_dev;
struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
- if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+ ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
+ }
+
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
+ ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
+ return;
+ }
ipoib_dbg(priv, "flushing\n");
- ipoib_ib_dev_down(dev);
+ ipoib_ib_dev_down(dev, 0);
/*
* The device could have been brought down between the start and when
@@ -603,7 +634,7 @@ void ipoib_ib_dev_flush(void *_dev)
/* Flush any child interfaces too */
list_for_each_entry(cpriv, &priv->child_intfs, list)
- ipoib_ib_dev_flush(&cpriv->dev);
+ ipoib_ib_dev_flush(cpriv->dev);
mutex_unlock(&priv->vlan_mutex);
}
@@ -630,17 +661,6 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
* change async notification is available.
*/
-static void ipoib_pkey_dev_check_presence(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- u16 pkey_index = 0;
-
- if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- else
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-}
-
void ipoib_pkey_poll(void *dev_ptr)
{
struct net_device *dev = dev_ptr;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c3b5f79d1168..53a32f65788d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -133,7 +133,13 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- ipoib_ib_dev_down(dev);
+ /*
+ * Now flush workqueue to make sure a scheduled task doesn't
+ * bring our internal state back up.
+ */
+ flush_workqueue(ipoib_workqueue);
+
+ ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
@@ -247,7 +253,6 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
if (neigh->ah)
ipoib_put_ah(neigh->ah);
*to_ipoib_neigh(neigh->neighbour) = NULL;
- neigh->neighbour->ops->destructor = NULL;
kfree(neigh);
}
@@ -513,12 +518,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
} else {
neigh->ah = NULL;
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- __skb_queue_tail(&neigh->queue, skb);
- } else {
- ++priv->stats.tx_dropped;
- dev_kfree_skb_any(skb);
- }
+ __skb_queue_tail(&neigh->queue, skb);
if (!path->query && path_rec_start(dev, path))
goto err;
@@ -530,7 +530,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
err:
*to_ipoib_neigh(skb->dst->neighbour) = NULL;
list_del(&neigh->list);
- neigh->neighbour->ops->destructor = NULL;
kfree(neigh);
++priv->stats.tx_dropped;
@@ -737,6 +736,11 @@ static void ipoib_set_mcast_list(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+ ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
+ return;
+ }
+
queue_work(ipoib_workqueue, &priv->restart_task);
}
@@ -769,21 +773,9 @@ static void ipoib_neigh_destructor(struct neighbour *n)
ipoib_put_ah(ah);
}
-static int ipoib_neigh_setup(struct neighbour *neigh)
-{
- /*
- * Is this kosher? I can't find anybody in the kernel that
- * sets neigh->destructor, so we should be able to set it here
- * without trouble.
- */
- neigh->ops->destructor = ipoib_neigh_destructor;
-
- return 0;
-}
-
static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
{
- parms->neigh_setup = ipoib_neigh_setup;
+ parms->neigh_destructor = ipoib_neigh_destructor;
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a2408d7ec598..93c462eaf4fd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -115,7 +115,6 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
if (neigh->ah)
ipoib_put_ah(neigh->ah);
*to_ipoib_neigh(neigh->neighbour) = NULL;
- neigh->neighbour->ops->destructor = NULL;
kfree(neigh);
}
@@ -213,6 +212,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
{
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_ah *ah;
int ret;
mcast->mcmember = *mcmember;
@@ -269,8 +269,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
av.static_rate, priv->local_rate,
ib_sa_rate_enum_to_int(mcast->mcmember.rate));
- mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
- if (!mcast->ah) {
+ ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (!ah) {
ipoib_warn(priv, "ib_address_create failed\n");
} else {
ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
@@ -280,6 +280,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
be16_to_cpu(mcast->mcmember.mlid),
mcast->mcmember.sl);
}
+
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
}
/* actually send any queued packets */
@@ -432,9 +436,11 @@ static void ipoib_mcast_join_complete(int status,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+ mutex_lock(&mcast_mutex);
+
+ spin_lock_irq(&priv->lock);
mcast->query = NULL;
- mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
if (status == -ETIMEDOUT)
queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -443,6 +449,7 @@ static void ipoib_mcast_join_complete(int status,
mcast->backoff * HZ);
} else
complete(&mcast->done);
+ spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
return;
@@ -630,21 +637,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
if (flush)
flush_workqueue(ipoib_workqueue);
+ spin_lock_irq(&priv->lock);
if (priv->broadcast && priv->broadcast->query) {
ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
priv->broadcast->query = NULL;
+ spin_unlock_irq(&priv->lock);
ipoib_dbg_mcast(priv, "waiting for bcast\n");
wait_for_completion(&priv->broadcast->done);
- }
+ } else
+ spin_unlock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) {
+ spin_lock_irq(&priv->lock);
if (mcast->query) {
ib_sa_cancel_query(mcast->query_id, mcast->query);
mcast->query = NULL;
+ spin_unlock_irq(&priv->lock);
ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
wait_for_completion(&mcast->done);
- }
+ } else
+ spin_unlock_irq(&priv->lock);
}
return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index faaf10e5fc7b..5f0388027b25 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -251,10 +251,12 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
- if (record->event == IB_EVENT_PORT_ACTIVE ||
+ if (record->event == IB_EVENT_PORT_ERR ||
+ record->event == IB_EVENT_PKEY_CHANGE ||
+ record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE) {
- ipoib_dbg(priv, "Port active event\n");
- schedule_work(&priv->flush_task);
+ ipoib_dbg(priv, "Port state change event\n");
+ queue_work(ipoib_workqueue, &priv->flush_task);
}
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 960dae5c87d1..61924cc30e55 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -503,8 +503,10 @@ err:
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
struct srp_request *req)
{
+ struct scatterlist *scat;
struct srp_cmd *cmd = req->cmd->buf;
- int len;
+ int len, nents, count;
+ int i;
u8 fmt;
if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
@@ -517,82 +519,66 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
return -EINVAL;
}
- if (scmnd->use_sg) {
- struct scatterlist *scat = scmnd->request_buffer;
- int n;
- int i;
-
- n = dma_map_sg(target->srp_host->dev->dma_device,
- scat, scmnd->use_sg, scmnd->sc_data_direction);
+ /*
+ * This handling of non-SG commands can be killed when the
+ * SCSI midlayer no longer generates non-SG commands.
+ */
+ if (likely(scmnd->use_sg)) {
+ nents = scmnd->use_sg;
+ scat = scmnd->request_buffer;
+ } else {
+ nents = 1;
+ scat = &req->fake_sg;
+ sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
+ }
- if (n == 1) {
- struct srp_direct_buf *buf = (void *) cmd->add_data;
+ count = dma_map_sg(target->srp_host->dev->dma_device, scat, nents,
+ scmnd->sc_data_direction);
- fmt = SRP_DATA_DESC_DIRECT;
+ if (count == 1) {
+ struct srp_direct_buf *buf = (void *) cmd->add_data;
- buf->va = cpu_to_be64(sg_dma_address(scat));
- buf->key = cpu_to_be32(target->srp_host->mr->rkey);
- buf->len = cpu_to_be32(sg_dma_len(scat));
+ fmt = SRP_DATA_DESC_DIRECT;
- len = sizeof (struct srp_cmd) +
- sizeof (struct srp_direct_buf);
- } else {
- struct srp_indirect_buf *buf = (void *) cmd->add_data;
- u32 datalen = 0;
+ buf->va = cpu_to_be64(sg_dma_address(scat));
+ buf->key = cpu_to_be32(target->srp_host->mr->rkey);
+ buf->len = cpu_to_be32(sg_dma_len(scat));
- fmt = SRP_DATA_DESC_INDIRECT;
+ len = sizeof (struct srp_cmd) +
+ sizeof (struct srp_direct_buf);
+ } else {
+ struct srp_indirect_buf *buf = (void *) cmd->add_data;
+ u32 datalen = 0;
- if (scmnd->sc_data_direction == DMA_TO_DEVICE)
- cmd->data_out_desc_cnt = n;
- else
- cmd->data_in_desc_cnt = n;
+ fmt = SRP_DATA_DESC_INDIRECT;
- buf->table_desc.va = cpu_to_be64(req->cmd->dma +
- sizeof *cmd +
- sizeof *buf);
- buf->table_desc.key =
+ if (scmnd->sc_data_direction == DMA_TO_DEVICE)
+ cmd->data_out_desc_cnt = count;
+ else
+ cmd->data_in_desc_cnt = count;
+
+ buf->table_desc.va = cpu_to_be64(req->cmd->dma +
+ sizeof *cmd +
+ sizeof *buf);
+ buf->table_desc.key =
+ cpu_to_be32(target->srp_host->mr->rkey);
+ buf->table_desc.len =
+ cpu_to_be32(count * sizeof (struct srp_direct_buf));
+
+ for (i = 0; i < count; ++i) {
+ buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
+ buf->desc_list[i].key =
cpu_to_be32(target->srp_host->mr->rkey);
- buf->table_desc.len =
- cpu_to_be32(n * sizeof (struct srp_direct_buf));
-
- for (i = 0; i < n; ++i) {
- buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
- buf->desc_list[i].key =
- cpu_to_be32(target->srp_host->mr->rkey);
- buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
-
- datalen += sg_dma_len(&scat[i]);
- }
-
- buf->len = cpu_to_be32(datalen);
+ buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
- len = sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- n * sizeof (struct srp_direct_buf);
- }
- } else {
- struct srp_direct_buf *buf = (void *) cmd->add_data;
- dma_addr_t dma;
-
- dma = dma_map_single(target->srp_host->dev->dma_device,
- scmnd->request_buffer, scmnd->request_bufflen,
- scmnd->sc_data_direction);
- if (dma_mapping_error(dma)) {
- printk(KERN_WARNING PFX "unable to map %p/%d (dir %d)\n",
- scmnd->request_buffer, (int) scmnd->request_bufflen,
- scmnd->sc_data_direction);
- return -EINVAL;
+ datalen += sg_dma_len(&scat[i]);
}
- pci_unmap_addr_set(req, direct_mapping, dma);
-
- buf->va = cpu_to_be64(dma);
- buf->key = cpu_to_be32(target->srp_host->mr->rkey);
- buf->len = cpu_to_be32(scmnd->request_bufflen);
+ buf->len = cpu_to_be32(datalen);
- fmt = SRP_DATA_DESC_DIRECT;
-
- len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
+ len = sizeof (struct srp_cmd) +
+ sizeof (struct srp_indirect_buf) +
+ count * sizeof (struct srp_direct_buf);
}
if (scmnd->sc_data_direction == DMA_TO_DEVICE)
@@ -600,7 +586,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
else
cmd->buf_fmt = fmt;
-
return len;
}
@@ -608,20 +593,28 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_target_port *target,
struct srp_request *req)
{
+ struct scatterlist *scat;
+ int nents;
+
if (!scmnd->request_buffer ||
(scmnd->sc_data_direction != DMA_TO_DEVICE &&
scmnd->sc_data_direction != DMA_FROM_DEVICE))
- return;
+ return;
- if (scmnd->use_sg)
- dma_unmap_sg(target->srp_host->dev->dma_device,
- (struct scatterlist *) scmnd->request_buffer,
- scmnd->use_sg, scmnd->sc_data_direction);
- else
- dma_unmap_single(target->srp_host->dev->dma_device,
- pci_unmap_addr(req, direct_mapping),
- scmnd->request_bufflen,
- scmnd->sc_data_direction);
+ /*
+ * This handling of non-SG commands can be killed when the
+ * SCSI midlayer no longer generates non-SG commands.
+ */
+ if (likely(scmnd->use_sg)) {
+ nents = scmnd->use_sg;
+ scat = (struct scatterlist *) scmnd->request_buffer;
+ } else {
+ nents = 1;
+ scat = (struct scatterlist *) scmnd->request_buffer;
+ }
+
+ dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents,
+ scmnd->sc_data_direction);
}
static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
@@ -1237,6 +1230,87 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return ret;
}
+static ssize_t show_id_ext(struct class_device *cdev, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%016llx\n",
+ (unsigned long long) be64_to_cpu(target->id_ext));
+}
+
+static ssize_t show_ioc_guid(struct class_device *cdev, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%016llx\n",
+ (unsigned long long) be64_to_cpu(target->ioc_guid));
+}
+
+static ssize_t show_service_id(struct class_device *cdev, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%016llx\n",
+ (unsigned long long) be64_to_cpu(target->service_id));
+}
+
+static ssize_t show_pkey(struct class_device *cdev, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED)
+ return -ENODEV;
+
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
+}
+
+static ssize_t show_dgid(struct class_device *cdev, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED)
+ return -ENODEV;
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[0]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[1]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[2]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[3]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[4]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[5]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[6]),
+ be16_to_cpu(((__be16 *) target->path.dgid.raw)[7]));
+}
+
+static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
+static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
+static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
+static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
+static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
+
+static struct class_device_attribute *srp_host_attrs[] = {
+ &class_device_attr_id_ext,
+ &class_device_attr_ioc_guid,
+ &class_device_attr_service_id,
+ &class_device_attr_pkey,
+ &class_device_attr_dgid,
+ NULL
+};
+
static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = DRV_NAME,
@@ -1249,7 +1323,8 @@ static struct scsi_host_template srp_template = {
.this_id = -1,
.sg_tablesize = SRP_MAX_INDIRECT,
.cmd_per_lun = SRP_SQ_SIZE,
- .use_clustering = ENABLE_CLUSTERING
+ .use_clustering = ENABLE_CLUSTERING,
+ .shost_attrs = srp_host_attrs
};
static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
@@ -1366,6 +1441,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
strlcpy(dgid, p + i * 2, 3);
target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
}
+ kfree(p);
break;
case SRP_OPT_PKEY:
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 4e7727df32f1..bd7f7c3115de 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -38,6 +38,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/scatterlist.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
@@ -94,7 +95,11 @@ struct srp_request {
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
struct srp_iu *tsk_mgmt;
- DECLARE_PCI_UNMAP_ADDR(direct_mapping)
+ /*
+ * Fake scatterlist used when scmnd->use_sg==0. Can be killed
+ * when the SCSI midlayer no longer generates non-SG commands.
+ */
+ struct scatterlist fake_sg;
struct completion done;
short next;
u8 cmd_done;