Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:

 - Various minor code cleanups and fixes for hns, iser, cxgb4, hfi1,
   rxe, erdma, mana_ib

 - Prefetch supprot for rxe ODP

 - Remove memory window support from hns as new device FW is no longer
   support it

 - Remove qib, it is very old and obsolete now, Cornelis wishes to
   restructure the hfi1/qib shared layer

 - Fix a race in destroying CQs where we can still end up with work
   running because the work is cancled before the driver stops
   triggering it

 - Improve interaction with namespaces:
     * Follow the devlink namespace for newly spawned RDMA devices
     * Create iopoib net devces in the parent IB device's namespace
     * Allow CAP_NET_RAW checks to pass in user namespaces

 - A new flow control scheme for IB MADs to try and avoid queue
   overflows in the network

 - Fix 2G message sizes in bnxt_re

 - Optimize mkey layout for mlx5 DMABUF

 - New "DMA Handle" concept to allow controlling PCI TPH and steering
   tags

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits)
  RDMA/siw: Change maintainer email address
  RDMA/mana_ib: add support of multiple ports
  RDMA/mlx5: Refactor optional counters steering code
  RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr
  IB: Extend UVERBS_METHOD_REG_MR to get DMAH
  RDMA/mlx5: Add DMAH object support
  RDMA/core: Introduce a DMAH object and its alloc/free APIs
  IB/core: Add UVERBS_METHOD_REG_MR on the MR object
  net/mlx5: Add support for device steering tag
  net/mlx5: Expose IFC bits for TPH
  PCI/TPH: Expose pcie_tph_get_st_table_size()
  RDMA/mlx5: Fix incorrect MKEY masking
  RDMA/mlx5: Fix returned type from _mlx5r_umr_zap_mkey()
  RDMA/mlx5: remove redundant check on err on return expression
  RDMA/mana_ib: add additional port counters
  RDMA/mana_ib: Fix DSCP value in modify QP
  RDMA/efa: Add CQ with external memory support
  RDMA/core: Add umem "is_contiguous" and "start_dma_addr" helpers
  RDMA/uverbs: Add a common way to create CQ with umem
  RDMA/mlx5: Optimize DMABUF mkey page size
  ...
This commit is contained in:
Linus Torvalds
2025-07-31 12:19:55 -07:00
153 changed files with 2867 additions and 49165 deletions

View File

@@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
uverbs_std_types_dmah.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o \

View File

@@ -161,6 +161,7 @@ struct cm_counter_attribute {
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
struct ib_mad_agent *rep_agent;
u32 port_num;
atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};
@@ -274,7 +275,8 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
complete(&cm_id_priv->comp);
}
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
static struct ib_mad_send_buf *
cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
@@ -286,7 +288,8 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
return ERR_PTR(-EINVAL);
read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
mad_agent = cm_id_priv->av.port->mad_agent;
mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
m = ERR_PTR(-EINVAL);
goto out;
@@ -315,6 +318,11 @@ out:
return m;
}
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
{
return cm_alloc_msg_agent(cm_id_priv, false);
}
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
@@ -323,13 +331,14 @@ static void cm_free_msg(struct ib_mad_send_buf *msg)
}
static struct ib_mad_send_buf *
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
bool rep_agent)
{
struct ib_mad_send_buf *msg;
lockdep_assert_held(&cm_id_priv->lock);
msg = cm_alloc_msg(cm_id_priv);
msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
if (IS_ERR(msg))
return msg;
@@ -344,6 +353,12 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
return msg;
}
static struct ib_mad_send_buf *
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
{
return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
}
static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
{
struct cm_id_private *cm_id_priv = msg->context[0];
@@ -2295,7 +2310,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
goto out;
}
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
msg = cm_alloc_priv_msg_rep(cm_id_priv, IB_CM_REP_SENT, true);
if (IS_ERR(msg)) {
ret = PTR_ERR(msg);
goto out;
@@ -4380,9 +4395,22 @@ static int cm_add_one(struct ib_device *ib_device)
goto error2;
}
port->rep_agent = ib_register_mad_agent(ib_device, i,
IB_QPT_GSI,
NULL,
0,
cm_send_handler,
NULL,
port,
0);
if (IS_ERR(port->rep_agent)) {
ret = PTR_ERR(port->rep_agent);
goto error3;
}
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
goto error3;
goto error4;
count++;
}
@@ -4397,6 +4425,8 @@ static int cm_add_one(struct ib_device *ib_device)
write_unlock_irqrestore(&cm.device_lock, flags);
return 0;
error4:
ib_unregister_mad_agent(port->rep_agent);
error3:
ib_unregister_mad_agent(port->mad_agent);
error2:
@@ -4410,6 +4440,7 @@ error1:
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->rep_agent);
ib_unregister_mad_agent(port->mad_agent);
ib_port_unregister_client_groups(ib_device, i,
cm_counter_groups);
@@ -4439,12 +4470,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
rdma_for_each_port (ib_device, i) {
struct ib_mad_agent *mad_agent;
struct ib_mad_agent *rep_agent;
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = cm_dev->port[i-1];
mad_agent = port->mad_agent;
rep_agent = port->rep_agent;
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
/*
* We flush the queue here after the going_down set, this
@@ -4458,8 +4491,10 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
*/
write_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
port->rep_agent = NULL;
write_unlock(&cm_dev->mad_agent_lock);
ib_unregister_mad_agent(mad_agent);
ib_unregister_mad_agent(rep_agent);
ib_port_unregister_client_groups(ib_device, i,
cm_counter_groups);
}

View File

@@ -461,7 +461,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
return NULL;
qp = container_of(res, struct ib_qp, res);
if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
if (qp->qp_type == IB_QPT_RAW_PACKET && !rdma_dev_has_raw_cap(dev))
goto err;
return qp;

View File

@@ -317,13 +317,18 @@ EXPORT_SYMBOL(__ib_alloc_cq_any);
*/
void ib_free_cq(struct ib_cq *cq)
{
int ret;
int ret = 0;
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
if (WARN_ON_ONCE(cq->cqe_used))
return;
if (cq->device->ops.pre_destroy_cq) {
ret = cq->device->ops.pre_destroy_cq(cq);
WARN_ONCE(ret, "Disable of kernel CQ shouldn't fail");
}
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
break;
@@ -340,7 +345,10 @@ void ib_free_cq(struct ib_cq *cq)
rdma_dim_destroy(cq);
trace_cq_free(cq);
ret = cq->device->ops.destroy_cq(cq, NULL);
if (cq->device->ops.post_destroy_cq)
cq->device->ops.post_destroy_cq(cq);
else
ret = cq->device->ops.destroy_cq(cq, NULL);
WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
kfree(cq->wc);

View File

@@ -145,6 +145,33 @@ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
}
EXPORT_SYMBOL(rdma_dev_access_netns);
/**
* rdma_dev_has_raw_cap() - Returns whether a specified rdma device has
* CAP_NET_RAW capability or not.
*
* @dev: Pointer to rdma device whose capability to be checked
*
* Returns true if a rdma device's owning user namespace has CAP_NET_RAW
* capability, otherwise false. When rdma subsystem is in legacy shared network,
* namespace mode, the default net namespace is considered.
*/
bool rdma_dev_has_raw_cap(const struct ib_device *dev)
{
const struct net *net;
/* Network namespace is the resource whose user namespace
* to be considered. When in shared mode, there is no reliable
* network namespace resource, so consider the default net namespace.
*/
if (ib_devices_shared_netns)
net = &init_net;
else
net = read_pnet(&dev->coredev.rdma_net);
return ns_capable(net->user_ns, CAP_NET_RAW);
}
EXPORT_SYMBOL(rdma_dev_has_raw_cap);
/*
* xarray has this behavior where it won't iterate over NULL values stored in
* allocated arrays. So we need our own iterator to see all values stored in
@@ -557,6 +584,8 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
/**
* _ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
* @net: network namespace device should be located in, namespace
* must stay valid until ib_register_device() is completed.
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
@@ -564,7 +593,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
struct ib_device *_ib_alloc_device(size_t size)
struct ib_device *_ib_alloc_device(size_t size, struct net *net)
{
struct ib_device *device;
unsigned int i;
@@ -581,7 +610,15 @@ struct ib_device *_ib_alloc_device(size_t size)
return NULL;
}
rdma_init_coredev(&device->coredev, device, &init_net);
/* ib_devices_shared_netns can't change while we have active namespaces
* in the system which means either init_net is passed or the user has
* no idea what they are doing.
*
* To avoid breaking backward compatibility, when in shared mode,
* force to init the device in the init_net.
*/
net = ib_devices_shared_netns ? &init_net : net;
rdma_init_coredev(&device->coredev, device, net);
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->qp_open_list_lock);
@@ -2671,6 +2708,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, add_sub_dev);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
SET_DEVICE_OP(dev_ops, alloc_dmah);
SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
@@ -2691,6 +2729,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
SET_DEVICE_OP(dev_ops, create_cq_umem);
SET_DEVICE_OP(dev_ops, create_flow);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
@@ -2698,6 +2737,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
SET_DEVICE_OP(dev_ops, dealloc_dmah);
SET_DEVICE_OP(dev_ops, dealloc_driver);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
@@ -2763,8 +2803,10 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
SET_DEVICE_OP(dev_ops, pre_destroy_cq);
SET_DEVICE_OP(dev_ops, poll_cq);
SET_DEVICE_OP(dev_ops, port_groups);
SET_DEVICE_OP(dev_ops, post_destroy_cq);
SET_DEVICE_OP(dev_ops, post_recv);
SET_DEVICE_OP(dev_ops, post_send);
SET_DEVICE_OP(dev_ops, post_srq_recv);
@@ -2793,6 +2835,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
SET_OBJ_SIZE(dev_ops, ib_dmah);
SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
SET_OBJ_SIZE(dev_ops, ib_qp);

View File

@@ -210,6 +210,29 @@ int ib_response_mad(const struct ib_mad_hdr *hdr)
}
EXPORT_SYMBOL(ib_response_mad);
#define SOL_FC_MAX_DEFAULT_FRAC 4
#define SOL_FC_MAX_SA_FRAC 32
static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req)
{
if (!mad_reg_req)
/* Send only agent */
return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
switch (mad_reg_req->mgmt_class) {
case IB_MGMT_CLASS_CM:
return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
case IB_MGMT_CLASS_SUBN_ADM:
return mad_recvq_size / SOL_FC_MAX_SA_FRAC;
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) /
SOL_FC_MAX_DEFAULT_FRAC;
default:
return 0;
}
}
/*
* ib_register_mad_agent - Register to send/receive MADs
*
@@ -391,13 +414,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
INIT_LIST_HEAD(&mad_agent_priv->backlog_list);
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
mad_agent_priv->sol_fc_send_count = 0;
mad_agent_priv->sol_fc_wait_count = 0;
mad_agent_priv->sol_fc_max =
recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0;
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
if (ret2) {
@@ -1055,6 +1082,180 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
return ret;
}
static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) {
mad_agent_priv->sol_fc_wait_count--;
list_move_tail(&mad_send_wr->agent_list,
&mad_agent_priv->backlog_list);
} else {
expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->backlog_list);
}
}
static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
if (mad_send_wr->state == IB_MAD_STATE_INIT) {
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
} else {
expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP,
IB_MAD_STATE_QUEUED);
list_move_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
}
if (mad_send_wr->is_solicited_fc) {
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
mad_agent_priv->sol_fc_wait_count--;
mad_agent_priv->sol_fc_send_count++;
}
}
static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_send_wr_private *temp_mad_send_wr;
struct list_head *list_item;
unsigned long delay;
expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START,
IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED);
if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
mad_send_wr->is_solicited_fc) {
mad_agent_priv->sol_fc_send_count--;
mad_agent_priv->sol_fc_wait_count++;
}
list_del_init(&mad_send_wr->agent_list);
delay = mad_send_wr->timeout;
mad_send_wr->timeout += jiffies;
if (delay) {
list_for_each_prev(list_item,
&mad_agent_priv->wait_list) {
temp_mad_send_wr = list_entry(
list_item,
struct ib_mad_send_wr_private,
agent_list);
if (time_after(mad_send_wr->timeout,
temp_mad_send_wr->timeout))
break;
}
} else {
list_item = &mad_agent_priv->wait_list;
}
list_add(&mad_send_wr->agent_list, list_item);
}
static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc;
}
static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
if (mad_send_wr->is_solicited_fc) {
if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
mad_agent_priv->sol_fc_send_count--;
else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
mad_agent_priv->sol_fc_wait_count--;
}
}
static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_agent_private *mad_agent_priv)
{
if (mad_send_wr->is_solicited_fc) {
if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
mad_agent_priv->sol_fc_send_count--;
else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
mad_agent_priv->sol_fc_wait_count--;
}
list_del_init(&mad_send_wr->agent_list);
}
void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state new_state)
{
struct ib_mad_agent_private *mad_agent_priv =
mad_send_wr->mad_agent_priv;
switch (new_state) {
case IB_MAD_STATE_INIT:
break;
case IB_MAD_STATE_QUEUED:
handle_queued_state(mad_send_wr, mad_agent_priv);
break;
case IB_MAD_STATE_SEND_START:
handle_send_state(mad_send_wr, mad_agent_priv);
break;
case IB_MAD_STATE_WAIT_RESP:
handle_wait_state(mad_send_wr, mad_agent_priv);
if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
return;
break;
case IB_MAD_STATE_EARLY_RESP:
handle_early_resp_state(mad_send_wr, mad_agent_priv);
break;
case IB_MAD_STATE_CANCELED:
handle_canceled_state(mad_send_wr, mad_agent_priv);
break;
case IB_MAD_STATE_DONE:
handle_done_state(mad_send_wr, mad_agent_priv);
break;
}
mad_send_wr->state = new_state;
}
static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
u8 mgmt_class;
if (!mad_send_wr->timeout)
return 0;
rmpp_mad = mad_send_wr->send_buf.mad;
if (mad_send_wr->mad_agent_priv->agent.rmpp_version &&
(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE))
return 0;
mgmt_class =
((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class;
return mgmt_class == IB_MGMT_CLASS_CM ||
mgmt_class == IB_MGMT_CLASS_SUBN_ADM ||
mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE;
}
static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *mad_agent_priv =
mad_send_wr->mad_agent_priv;
if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max)
return false;
if (!list_empty(&mad_agent_priv->backlog_list))
return true;
return mad_agent_priv->sol_fc_send_count +
mad_agent_priv->sol_fc_wait_count >=
mad_agent_priv->sol_fc_max;
}
/*
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client
@@ -1080,9 +1281,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
if (ret)
goto error;
if (!send_buf->mad_agent->send_handler ||
(send_buf->timeout_ms &&
!send_buf->mad_agent->recv_handler)) {
if (!send_buf->mad_agent->send_handler) {
ret = -EINVAL;
goto error;
}
@@ -1118,15 +1317,19 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->max_retries = send_buf->retries;
mad_send_wr->retries_left = send_buf->retries;
send_buf->retries = 0;
/* Reference for work request to QP + response */
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
change_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
/* Reference MAD agent until send completes */
refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr);
if (mad_is_for_backlog(mad_send_wr)) {
change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return 0;
}
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
@@ -1138,7 +1341,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
if (ret < 0) {
/* Fail send request */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_del(&mad_send_wr->agent_list);
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
deref_mad_agent(mad_agent_priv);
goto error;
@@ -1746,7 +1949,19 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
*/
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) {
if ((wr->tid == mad_hdr->tid) &&
rcv_has_same_class(wr, wc) &&
/*
* Don't check GID for direct routed MADs.
* These might have permissive LIDs.
*/
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
/*
@@ -1765,17 +1980,55 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
/* Verify request has not been canceled */
return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
return NULL;
}
static void
process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc = {};
unsigned long flags;
int ret;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
while (!list_empty(&mad_agent_priv->backlog_list) &&
(mad_agent_priv->sol_fc_send_count +
mad_agent_priv->sol_fc_wait_count <
mad_agent_priv->sol_fc_max)) {
mad_send_wr = list_entry(mad_agent_priv->backlog_list.next,
struct ib_mad_send_wr_private,
agent_list);
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
ret = ib_send_mad(mad_send_wr);
if (ret) {
spin_lock_irqsave(&mad_agent_priv->lock, flags);
deref_mad_agent(mad_agent_priv);
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = IB_WC_LOC_QP_OP_ERR;
mad_agent_priv->agent.send_handler(
&mad_agent_priv->agent, &mad_send_wc);
}
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
{
mad_send_wr->timeout = 0;
if (mad_send_wr->refcount == 1)
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->done_list);
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP ||
mad_send_wr->state == IB_MAD_STATE_QUEUED)
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
else
change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP);
}
static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -1784,6 +2037,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
bool is_mad_done;
int ret;
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
@@ -1832,6 +2086,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
}
} else {
ib_mark_mad_done(mad_send_wr);
is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
@@ -1841,10 +2096,13 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (is_mad_done) {
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr,
&mad_send_wc);
}
}
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
@@ -2172,30 +2430,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *temp_mad_send_wr;
struct list_head *list_item;
unsigned long delay;
mad_agent_priv = mad_send_wr->mad_agent_priv;
list_del(&mad_send_wr->agent_list);
delay = mad_send_wr->timeout;
mad_send_wr->timeout += jiffies;
if (delay) {
list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
temp_mad_send_wr = list_entry(list_item,
struct ib_mad_send_wr_private,
agent_list);
if (time_after(mad_send_wr->timeout,
temp_mad_send_wr->timeout))
break;
}
} else {
list_item = &mad_agent_priv->wait_list;
}
list_add(&mad_send_wr->agent_list, list_item);
change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP);
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
@@ -2229,32 +2468,28 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
} else
ret = IB_RMPP_RESULT_UNHANDLED;
if (mad_send_wc->status != IB_WC_SUCCESS &&
mad_send_wr->status == IB_WC_SUCCESS) {
mad_send_wr->status = mad_send_wc->status;
mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
}
if (--mad_send_wr->refcount > 0) {
if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
mad_send_wr->status == IB_WC_SUCCESS) {
wait_for_response(mad_send_wr);
}
if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
mad_send_wc->status = IB_WC_WR_FLUSH_ERR;
else if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
mad_send_wr->timeout) {
wait_for_response(mad_send_wr);
goto done;
}
/* Remove send from MAD agent and notify client of completion */
list_del(&mad_send_wr->agent_list);
if (mad_send_wr->state != IB_MAD_STATE_DONE)
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
adjust_timeout(mad_agent_priv);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
if (mad_send_wr->status != IB_WC_SUCCESS)
mad_send_wc->status = mad_send_wr->status;
if (ret == IB_RMPP_RESULT_INTERNAL)
if (ret == IB_RMPP_RESULT_INTERNAL) {
ib_rmpp_send_handler(mad_send_wc);
else
} else {
if (mad_send_wr->is_solicited_fc)
process_backlog_mads(mad_agent_priv);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
}
/* Release reference on agent taken when sending */
deref_mad_agent(mad_agent_priv);
@@ -2396,40 +2631,53 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
return true;
}
static void clear_mad_error_list(struct list_head *list,
enum ib_wc_status wc_status,
struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_send_wr_private *mad_send_wr, *n;
struct ib_mad_send_wc mad_send_wc;
mad_send_wc.status = wc_status;
mad_send_wc.vendor_err = 0;
list_for_each_entry_safe(mad_send_wr, n, list, agent_list) {
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
deref_mad_agent(mad_agent_priv);
}
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
{
unsigned long flags;
struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
struct list_head cancel_list;
INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&mad_agent_priv->send_list, agent_list) {
if (mad_send_wr->status == IB_WC_SUCCESS) {
mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
}
&mad_agent_priv->send_list, agent_list)
change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
/* Empty wait & backlog list to prevent receives from finding request */
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&mad_agent_priv->wait_list, agent_list) {
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
list_add_tail(&mad_send_wr->agent_list, &cancel_list);
}
/* Empty wait list to prevent receives from finding a request */
list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Report all cancelled requests */
mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
mad_send_wc.vendor_err = 0;
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&cancel_list, agent_list) {
mad_send_wc.send_buf = &mad_send_wr->send_buf;
list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
deref_mad_agent(mad_agent_priv);
&mad_agent_priv->backlog_list, agent_list) {
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
list_add_tail(&mad_send_wr->agent_list, &cancel_list);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Report all cancelled requests */
clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
static struct ib_mad_send_wr_private*
@@ -2451,6 +2699,13 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list,
agent_list) {
if (&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
return NULL;
}
@@ -2468,16 +2723,16 @@ int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
struct ib_mad_agent_private, agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
}
active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
if (!timeout_ms) {
mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
}
active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) ||
(mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) ||
(mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms));
if (!timeout_ms)
change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
mad_send_wr->send_buf.timeout_ms = timeout_ms;
if (active)
@@ -2589,6 +2844,11 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.retries++;
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
if (mad_send_wr->is_solicited_fc &&
!list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) {
change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
return 0;
}
if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -2606,26 +2866,25 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
} else
ret = ib_send_mad(mad_send_wr);
if (!ret) {
mad_send_wr->refcount++;
list_add_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
if (!ret)
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
return ret;
}
static void timeout_sends(struct work_struct *work)
{
struct ib_mad_send_wr_private *mad_send_wr, *n;
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wc mad_send_wc;
struct list_head local_list;
struct list_head timeout_list;
struct list_head cancel_list;
struct list_head *list_item;
unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private,
timed_work.work);
mad_send_wc.vendor_err = 0;
INIT_LIST_HEAD(&local_list);
INIT_LIST_HEAD(&timeout_list);
INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
while (!list_empty(&mad_agent_priv->wait_list)) {
@@ -2643,25 +2902,22 @@ static void timeout_sends(struct work_struct *work)
break;
}
list_del_init(&mad_send_wr->agent_list);
if (mad_send_wr->status == IB_WC_SUCCESS &&
!retry_send(mad_send_wr))
if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
list_item = &cancel_list;
else if (retry_send(mad_send_wr))
list_item = &timeout_list;
else
continue;
list_add_tail(&mad_send_wr->agent_list, &local_list);
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
list_add_tail(&mad_send_wr->agent_list, list_item);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, n, &local_list, agent_list) {
if (mad_send_wr->status == IB_WC_SUCCESS)
mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
else
mad_send_wc.status = mad_send_wr->status;
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
deref_mad_agent(mad_agent_priv);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
process_backlog_mads(mad_agent_priv);
clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR,
mad_agent_priv);
clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
/*

View File

@@ -95,13 +95,16 @@ struct ib_mad_agent_private {
spinlock_t lock;
struct list_head send_list;
unsigned int sol_fc_send_count;
struct list_head wait_list;
struct list_head done_list;
unsigned int sol_fc_wait_count;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
struct list_head rmpp_list;
unsigned int sol_fc_max;
struct list_head backlog_list;
refcount_t refcount;
union {
@@ -118,6 +121,32 @@ struct ib_mad_snoop_private {
struct completion comp;
};
enum ib_mad_state {
/* MAD is in the making and is not yet in any list */
IB_MAD_STATE_INIT,
/* MAD is in backlog list */
IB_MAD_STATE_QUEUED,
/*
* MAD was sent to the QP and is waiting for completion
* notification in send list.
*/
IB_MAD_STATE_SEND_START,
/*
* MAD send completed successfully, waiting for a response
* in wait list.
*/
IB_MAD_STATE_WAIT_RESP,
/*
* Response came early, before send completion notification,
* in send list.
*/
IB_MAD_STATE_EARLY_RESP,
/* MAD was canceled while in wait or send list */
IB_MAD_STATE_CANCELED,
/* MAD processing completed, MAD in no list */
IB_MAD_STATE_DONE
};
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
@@ -132,8 +161,6 @@ struct ib_mad_send_wr_private {
int max_retries;
int retries_left;
int retry;
int refcount;
enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
@@ -143,8 +170,48 @@ struct ib_mad_send_wr_private {
int seg_num;
int newwin;
int pad;
enum ib_mad_state state;
/* Solicited MAD flow control */
bool is_solicited_fc;
};
static inline void expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state expected_state)
{
if (IS_ENABLED(CONFIG_LOCKDEP))
WARN_ON(mad_send_wr->state != expected_state);
}
static inline void expect_mad_state2(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state expected_state1,
enum ib_mad_state expected_state2)
{
if (IS_ENABLED(CONFIG_LOCKDEP))
WARN_ON(mad_send_wr->state != expected_state1 &&
mad_send_wr->state != expected_state2);
}
static inline void expect_mad_state3(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state expected_state1,
enum ib_mad_state expected_state2,
enum ib_mad_state expected_state3)
{
if (IS_ENABLED(CONFIG_LOCKDEP))
WARN_ON(mad_send_wr->state != expected_state1 &&
mad_send_wr->state != expected_state2 &&
mad_send_wr->state != expected_state3);
}
static inline void
not_expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state wrong_state)
{
if (IS_ENABLED(CONFIG_LOCKDEP))
WARN_ON(mad_send_wr->state == wrong_state);
}
struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
@@ -222,4 +289,7 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
unsigned long timeout_ms);
void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
enum ib_mad_state new_state);
#endif /* __IB_MAD_PRIV_H__ */

View File

@@ -608,16 +608,20 @@ static void abort_send(struct ib_mad_agent_private *agent,
goto out; /* Unmatched send */
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
(!mad_send_wr->timeout) ||
(mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
ib_mark_mad_done(mad_send_wr);
if (mad_send_wr->state == IB_MAD_STATE_DONE) {
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
spin_unlock_irqrestore(&agent->lock, flags);
@@ -684,7 +688,8 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
(!mad_send_wr->timeout) ||
(mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
if (seg_num > mad_send_wr->send_buf.seg_count ||
@@ -709,21 +714,24 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
if (mad_send_wr->state == IB_MAD_STATE_DONE) {
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
if (mad_send_wr->refcount == 1)
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
ib_reset_mad_timeout(mad_send_wr,
mad_send_wr->send_buf.timeout_ms);
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return;
} else if (mad_send_wr->refcount == 1 &&
} else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
@@ -731,7 +739,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (ret)
goto out;
mad_send_wr->refcount++;
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
@@ -890,7 +898,6 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->newwin = init_newwin(mad_send_wr);
/* We need to wait for the final ACK even if there isn't a response */
mad_send_wr->refcount += (mad_send_wr->timeout == 0);
ret = send_next_seg(mad_send_wr);
if (!ret)
return IB_RMPP_RESULT_CONSUMED;
@@ -912,7 +919,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
if (mad_send_wc->status != IB_WC_SUCCESS ||
mad_send_wr->status != IB_WC_SUCCESS)
mad_send_wr->state == IB_MAD_STATE_CANCELED)
return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
if (!mad_send_wr->timeout)

View File

@@ -255,7 +255,7 @@ EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
bool rdma_nl_get_privileged_qkey(void)
{
return privileged_qkey || capable(CAP_NET_RAW);
return privileged_qkey;
}
EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
@@ -1469,10 +1469,11 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
};
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
enum rdma_restrack_type res_type,
res_fill_func_t fill_func)
static noinline_for_stack int
res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
enum rdma_restrack_type res_type,
res_fill_func_t fill_func)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -2263,10 +2264,10 @@ err:
return ret;
}
static int stat_get_doit_default_counter(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
struct nlattr *tb[])
static noinline_for_stack int
stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
struct nlattr *tb[])
{
struct rdma_hw_stats *stats;
struct nlattr *table_attr;
@@ -2356,8 +2357,9 @@ err:
return ret;
}
static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack, struct nlattr *tb[])
static noinline_for_stack int
stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack, struct nlattr *tb[])
{
static enum rdma_nl_counter_mode mode;

View File

@@ -1019,3 +1019,32 @@ void uverbs_finalize_object(struct ib_uobject *uobj,
WARN_ON(true);
}
}
/**
* rdma_uattrs_has_raw_cap() - Returns whether a rdma device linked to the
* uverbs attributes file has CAP_NET_RAW
* capability or not.
*
* @attrs: Pointer to uverbs attributes
*
* Returns true if a rdma device's owning user namespace has CAP_NET_RAW
* capability, otherwise false.
*/
bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = attrs->ufile;
struct ib_ucontext *ucontext;
bool has_cap = false;
int srcu_key;
srcu_key = srcu_read_lock(&ufile->device->disassociate_srcu);
ucontext = ib_uverbs_get_ucontext_file(ufile);
if (IS_ERR(ucontext))
goto out;
has_cap = rdma_dev_has_raw_cap(ucontext->device);
out:
srcu_read_unlock(&ufile->device->disassociate_srcu, srcu_key);
return has_cap;
}
EXPORT_SYMBOL(rdma_uattrs_has_raw_cap);

View File

@@ -156,6 +156,7 @@ extern const struct uapi_definition uverbs_def_obj_counters[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
extern const struct uapi_definition uverbs_def_obj_dm[];
extern const struct uapi_definition uverbs_def_obj_dmah[];
extern const struct uapi_definition uverbs_def_obj_flow_action[];
extern const struct uapi_definition uverbs_def_obj_intf[];
extern const struct uapi_definition uverbs_def_obj_mr[];

View File

@@ -100,6 +100,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
return container_of(res, struct rdma_counter, res)->device;
case RDMA_RESTRACK_SRQ:
return container_of(res, struct ib_srq, res)->device;
case RDMA_RESTRACK_DMAH:
return container_of(res, struct ib_dmah, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;

View File

@@ -741,7 +741,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
}
mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags,
cmd.access_flags, NULL,
&attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
@@ -1312,9 +1312,9 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
switch (cmd->qp_type) {
case IB_QPT_RAW_PACKET:
if (!capable(CAP_NET_RAW))
if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
break;
fallthrough;
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
@@ -1451,7 +1451,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
}
if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (!capable(CAP_NET_RAW)) {
if (!rdma_uattrs_has_raw_cap(attrs)) {
ret = -EPERM;
goto err_put;
}
@@ -1877,7 +1877,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mig_state = cmd->base.path_mig_state;
if (cmd->base.attr_mask & IB_QP_QKEY) {
if (cmd->base.qkey & IB_QP_SET_QKEY &&
!rdma_nl_get_privileged_qkey()) {
!(rdma_nl_get_privileged_qkey() ||
rdma_uattrs_has_raw_cap(attrs))) {
ret = -EPERM;
goto release_qp;
}
@@ -3225,7 +3226,7 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
if (cmd.comp_mask)
return -EINVAL;
if (!capable(CAP_NET_RAW))
if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)

View File

@@ -64,15 +64,21 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uevent.uobject);
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_device *ib_dev = attrs->context->device;
int ret;
u64 user_handle;
struct ib_umem_dmabuf *umem_dmabuf;
struct ib_cq_init_attr attr = {};
struct ib_cq *cq;
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
struct ib_umem *umem = NULL;
u64 buffer_length;
u64 buffer_offset;
struct ib_cq *cq;
u64 user_handle;
u64 buffer_va;
int buffer_fd;
int ret;
if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq)
if ((!ib_dev->ops.create_cq && !ib_dev->ops.create_cq_umem) || !ib_dev->ops.destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -112,9 +118,66 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->uevent.event_list);
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA)) {
ret = uverbs_copy_from(&buffer_va, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA);
if (ret)
goto err_event_file;
ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
if (ret)
goto err_event_file;
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD) ||
uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
!ib_dev->ops.create_cq_umem) {
ret = -EINVAL;
goto err_event_file;
}
umem = ib_umem_get(ib_dev, buffer_va, buffer_length, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
ret = PTR_ERR(umem);
goto err_event_file;
}
} else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD)) {
ret = uverbs_get_raw_fd(&buffer_fd, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD);
if (ret)
goto err_event_file;
ret = uverbs_copy_from(&buffer_offset, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET);
if (ret)
goto err_event_file;
ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
if (ret)
goto err_event_file;
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA) ||
!ib_dev->ops.create_cq_umem) {
ret = -EINVAL;
goto err_event_file;
}
umem_dmabuf = ib_umem_dmabuf_get_pinned(ib_dev, buffer_offset, buffer_length,
buffer_fd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem_dmabuf)) {
ret = PTR_ERR(umem_dmabuf);
goto err_event_file;
}
umem = &umem_dmabuf->umem;
} else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH) ||
!ib_dev->ops.create_cq) {
ret = -EINVAL;
goto err_event_file;
}
cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
if (!cq) {
ret = -ENOMEM;
ib_umem_release(umem);
goto err_event_file;
}
@@ -128,7 +191,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
rdma_restrack_set_name(&cq->res, NULL);
ret = ib_dev->ops.create_cq(cq, &attr, attrs);
ret = umem ? ib_dev->ops.create_cq_umem(cq, &attr, umem, attrs) :
ib_dev->ops.create_cq(cq, &attr, attrs);
if (ret)
goto err_free;
@@ -180,6 +244,17 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_OBJECT_ASYNC_EVENT,
UVERBS_ACCESS_READ,
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_VA,
UVERBS_ATTR_TYPE(u64),
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH,
UVERBS_ATTR_TYPE(u64),
UA_OPTIONAL),
UVERBS_ATTR_RAW_FD(UVERBS_ATTR_CREATE_CQ_BUFFER_FD,
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET,
UVERBS_ATTR_TYPE(u64),
UA_OPTIONAL),
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(

View File

@@ -0,0 +1,145 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_std_types.h>
#include "restrack.h"
static int uverbs_free_dmah(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
struct ib_dmah *dmah = uobject->object;
int ret;
if (atomic_read(&dmah->usecnt))
return -EBUSY;
ret = dmah->device->ops.dealloc_dmah(dmah, attrs);
if (ret)
return ret;
rdma_restrack_del(&dmah->res);
kfree(dmah);
return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)(
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE)
->obj_attr.uobject;
struct ib_device *ib_dev = attrs->context->device;
struct ib_dmah *dmah;
int ret;
dmah = rdma_zalloc_drv_obj(ib_dev, ib_dmah);
if (!dmah)
return -ENOMEM;
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_CPU_ID)) {
ret = uverbs_copy_from(&dmah->cpu_id, attrs,
UVERBS_ATTR_ALLOC_DMAH_CPU_ID);
if (ret)
goto err;
if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) {
ret = -EPERM;
goto err;
}
dmah->valid_fields |= BIT(IB_DMAH_CPU_ID_EXISTS);
}
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE)) {
dmah->mem_type = uverbs_attr_get_enum_id(attrs,
UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE);
dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS);
}
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) {
ret = uverbs_copy_from(&dmah->ph, attrs,
UVERBS_ATTR_ALLOC_DMAH_PH);
if (ret)
goto err;
/* Per PCIe spec 6.2-1.0, only the lowest two bits are applicable */
if (dmah->ph & 0xFC) {
ret = -EINVAL;
goto err;
}
dmah->valid_fields |= BIT(IB_DMAH_PH_EXISTS);
}
dmah->device = ib_dev;
dmah->uobject = uobj;
atomic_set(&dmah->usecnt, 0);
rdma_restrack_new(&dmah->res, RDMA_RESTRACK_DMAH);
rdma_restrack_set_name(&dmah->res, NULL);
ret = ib_dev->ops.alloc_dmah(dmah, attrs);
if (ret) {
rdma_restrack_put(&dmah->res);
goto err;
}
uobj->object = dmah;
rdma_restrack_add(&dmah->res);
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE);
return 0;
err:
kfree(dmah);
return ret;
}
static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = {
[TPH_MEM_TYPE_VM] = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
UVERBS_ATTR_NO_DATA(),
},
[TPH_MEM_TYPE_PM] = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
UVERBS_ATTR_NO_DATA(),
},
};
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DMAH_ALLOC,
UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DMAH_HANDLE,
UVERBS_OBJECT_DMAH,
UVERBS_ACCESS_NEW,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
UVERBS_ATTR_TYPE(u32),
UA_OPTIONAL),
UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
uverbs_dmah_mem_type,
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH,
UVERBS_ATTR_TYPE(u8),
UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_DMAH_FREE,
UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DMA_HANDLE,
UVERBS_OBJECT_DMAH,
UVERBS_ACCESS_DESTROY,
UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DMAH,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_dmah),
&UVERBS_METHOD(UVERBS_METHOD_DMAH_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DMAH_FREE));
const struct uapi_definition uverbs_def_obj_dmah[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMAH,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_dmah),
UAPI_DEF_OBJ_NEEDS_FN(alloc_dmah)),
{}
};

View File

@@ -238,7 +238,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
return ret;
mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
access_flags,
access_flags, NULL,
attrs);
if (IS_ERR(mr))
return PTR_ERR(mr);
@@ -266,6 +266,135 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
return ret;
}
static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_MR_HANDLE);
struct ib_pd *pd =
uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_MR_PD_HANDLE);
u32 valid_access_flags = IB_ACCESS_SUPPORTED;
u64 length, iova, fd_offset = 0, addr = 0;
struct ib_device *ib_dev = pd->device;
struct ib_dmah *dmah = NULL;
bool has_fd_offset = false;
bool has_addr = false;
bool has_fd = false;
u32 access_flags;
struct ib_mr *mr;
int fd;
int ret;
ret = uverbs_copy_from(&iova, attrs, UVERBS_ATTR_REG_MR_IOVA);
if (ret)
return ret;
ret = uverbs_copy_from(&length, attrs, UVERBS_ATTR_REG_MR_LENGTH);
if (ret)
return ret;
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_ADDR)) {
ret = uverbs_copy_from(&addr, attrs,
UVERBS_ATTR_REG_MR_ADDR);
if (ret)
return ret;
has_addr = true;
}
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD_OFFSET)) {
ret = uverbs_copy_from(&fd_offset, attrs,
UVERBS_ATTR_REG_MR_FD_OFFSET);
if (ret)
return ret;
has_fd_offset = true;
}
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD)) {
ret = uverbs_get_raw_fd(&fd, attrs,
UVERBS_ATTR_REG_MR_FD);
if (ret)
return ret;
has_fd = true;
}
if (has_fd) {
if (!ib_dev->ops.reg_user_mr_dmabuf)
return -EOPNOTSUPP;
/* FD requires offset and can't come with addr */
if (!has_fd_offset || has_addr)
return -EINVAL;
if ((fd_offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
return -EINVAL;
valid_access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC |
IB_ACCESS_RELAXED_ORDERING;
} else {
if (!has_addr || has_fd_offset)
return -EINVAL;
if ((addr & ~PAGE_MASK) != (iova & ~PAGE_MASK))
return -EINVAL;
}
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_DMA_HANDLE)) {
dmah = uverbs_attr_get_obj(attrs,
UVERBS_ATTR_REG_MR_DMA_HANDLE);
if (IS_ERR(dmah))
return PTR_ERR(dmah);
}
ret = uverbs_get_flags32(&access_flags, attrs,
UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
valid_access_flags);
if (ret)
return ret;
ret = ib_check_mr_access(ib_dev, access_flags);
if (ret)
return ret;
if (has_fd)
mr = pd->device->ops.reg_user_mr_dmabuf(pd, fd_offset, length,
iova, fd, access_flags,
dmah, attrs);
else
mr = pd->device->ops.reg_user_mr(pd, addr, length, iova,
access_flags, dmah, NULL);
if (IS_ERR(mr))
return PTR_ERR(mr);
mr->device = pd->device;
mr->pd = pd;
mr->type = IB_MR_TYPE_USER;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
if (dmah) {
mr->dmah = dmah;
atomic_inc(&dmah->usecnt);
}
rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
rdma_restrack_set_name(&mr->res, NULL);
rdma_restrack_add(&mr->res);
uobj->object = mr;
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_MR_HANDLE);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_LKEY,
&mr->lkey, sizeof(mr->lkey));
if (ret)
return ret;
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_RKEY,
&mr->rkey, sizeof(mr->rkey));
return ret;
}
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_ADVISE_MR,
UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
@@ -362,6 +491,44 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_REG_MR,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_HANDLE,
UVERBS_OBJECT_MR,
UVERBS_ACCESS_NEW,
UA_MANDATORY),
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_PD_HANDLE,
UVERBS_OBJECT_PD,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_DMA_HANDLE,
UVERBS_OBJECT_DMAH,
UVERBS_ACCESS_READ,
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_IOVA,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_LENGTH,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY),
UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
enum ib_access_flags,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_ADDR,
UVERBS_ATTR_TYPE(u64),
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_FD_OFFSET,
UVERBS_ATTR_TYPE(u64),
UA_OPTIONAL),
UVERBS_ATTR_RAW_FD(UVERBS_ATTR_REG_MR_FD,
UA_OPTIONAL),
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_LKEY,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY),
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_RKEY,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MR_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
@@ -376,7 +543,8 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_QUERY_MR),
&UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR));
&UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR),
&UVERBS_METHOD(UVERBS_METHOD_REG_MR));
const struct uapi_definition uverbs_def_obj_mr[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,

View File

@@ -133,7 +133,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
device = xrcd->device;
break;
case IB_UVERBS_QPT_RAW_PACKET:
if (!capable(CAP_NET_RAW))
if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
fallthrough;
case IB_UVERBS_QPT_RC:

View File

@@ -631,6 +631,7 @@ static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
UAPI_DEF_CHAIN(uverbs_def_obj_mr),

View File

@@ -2223,7 +2223,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
access_flags, NULL);
access_flags, NULL, NULL);
if (IS_ERR(mr))
return mr;
@@ -2262,6 +2262,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
struct ib_dm *dm = mr->dm;
struct ib_dmah *dmah = mr->dmah;
struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
@@ -2272,6 +2273,8 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
atomic_dec(&pd->usecnt);
if (dm)
atomic_dec(&dm->usecnt);
if (dmah)
atomic_dec(&dmah->usecnt);
kfree(sig_attrs);
}