Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:

 - Various minor code cleanups and fixes for hns, iser, cxgb4, hfi1,
   rxe, erdma, mana_ib

 - Prefetch supprot for rxe ODP

 - Remove memory window support from hns as new device FW is no longer
   support it

 - Remove qib, it is very old and obsolete now, Cornelis wishes to
   restructure the hfi1/qib shared layer

 - Fix a race in destroying CQs where we can still end up with work
   running because the work is cancled before the driver stops
   triggering it

 - Improve interaction with namespaces:
     * Follow the devlink namespace for newly spawned RDMA devices
     * Create iopoib net devces in the parent IB device's namespace
     * Allow CAP_NET_RAW checks to pass in user namespaces

 - A new flow control scheme for IB MADs to try and avoid queue
   overflows in the network

 - Fix 2G message sizes in bnxt_re

 - Optimize mkey layout for mlx5 DMABUF

 - New "DMA Handle" concept to allow controlling PCI TPH and steering
   tags

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits)
  RDMA/siw: Change maintainer email address
  RDMA/mana_ib: add support of multiple ports
  RDMA/mlx5: Refactor optional counters steering code
  RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr
  IB: Extend UVERBS_METHOD_REG_MR to get DMAH
  RDMA/mlx5: Add DMAH object support
  RDMA/core: Introduce a DMAH object and its alloc/free APIs
  IB/core: Add UVERBS_METHOD_REG_MR on the MR object
  net/mlx5: Add support for device steering tag
  net/mlx5: Expose IFC bits for TPH
  PCI/TPH: Expose pcie_tph_get_st_table_size()
  RDMA/mlx5: Fix incorrect MKEY masking
  RDMA/mlx5: Fix returned type from _mlx5r_umr_zap_mkey()
  RDMA/mlx5: remove redundant check on err on return expression
  RDMA/mana_ib: add additional port counters
  RDMA/mana_ib: Fix DSCP value in modify QP
  RDMA/efa: Add CQ with external memory support
  RDMA/core: Add umem "is_contiguous" and "start_dma_addr" helpers
  RDMA/uverbs: Add a common way to create CQ with umem
  RDMA/mlx5: Optimize DMABUF mkey page size
  ...
This commit is contained in:
Linus Torvalds
2025-07-31 12:19:55 -07:00
153 changed files with 2867 additions and 49165 deletions

View File

@@ -167,5 +167,10 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_
#
mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
#
# TPH support
#
mlx5_core-$(CONFIG_PCIE_TPH) += lib/st.o
obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o
mlx5_dpll-y := dpll.o

View File

@@ -45,11 +45,6 @@ int mlx5_crdump_enable(struct mlx5_core_dev *dev);
void mlx5_crdump_disable(struct mlx5_core_dev *dev);
int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
{
return devlink_net(priv_to_devlink(dev));
}
static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
{
return mdev->mlx5e_res.uplink_netdev;

View File

@@ -0,0 +1,164 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
#include "mlx5_core.h"
#include "lib/mlx5.h"
struct mlx5_st_idx_data {
refcount_t usecount;
u16 tag;
};
struct mlx5_st {
/* serialize access upon alloc/free flows */
struct mutex lock;
struct xa_limit index_limit;
struct xarray idx_xa; /* key == index, value == struct mlx5_st_idx_data */
};
struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev)
{
struct pci_dev *pdev = dev->pdev;
struct mlx5_st *st;
u16 num_entries;
int ret;
if (!MLX5_CAP_GEN(dev, mkey_pcie_tph))
return NULL;
#ifdef CONFIG_MLX5_SF
if (mlx5_core_is_sf(dev))
return dev->priv.parent_mdev->st;
#endif
/* Checking whether the device is capable */
if (!pdev->tph_cap)
return NULL;
num_entries = pcie_tph_get_st_table_size(pdev);
/* We need a reserved entry for non TPH cases */
if (num_entries < 2)
return NULL;
/* The OS doesn't support ST */
ret = pcie_enable_tph(pdev, PCI_TPH_ST_DS_MODE);
if (ret)
return NULL;
st = kzalloc(sizeof(*st), GFP_KERNEL);
if (!st)
goto end;
mutex_init(&st->lock);
xa_init_flags(&st->idx_xa, XA_FLAGS_ALLOC);
/* entry 0 is reserved for non TPH cases */
st->index_limit.min = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX + 1;
st->index_limit.max = num_entries - 1;
return st;
end:
pcie_disable_tph(dev->pdev);
return NULL;
}
void mlx5_st_destroy(struct mlx5_core_dev *dev)
{
struct mlx5_st *st = dev->st;
if (mlx5_core_is_sf(dev) || !st)
return;
pcie_disable_tph(dev->pdev);
WARN_ON_ONCE(!xa_empty(&st->idx_xa));
kfree(st);
}
int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
unsigned int cpu_uid, u16 *st_index)
{
struct mlx5_st_idx_data *idx_data;
struct mlx5_st *st = dev->st;
unsigned long index;
u32 xa_id;
u16 tag;
int ret;
if (!st)
return -EOPNOTSUPP;
ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
if (ret)
return ret;
mutex_lock(&st->lock);
xa_for_each(&st->idx_xa, index, idx_data) {
if (tag == idx_data->tag) {
refcount_inc(&idx_data->usecount);
*st_index = index;
goto end;
}
}
idx_data = kzalloc(sizeof(*idx_data), GFP_KERNEL);
if (!idx_data) {
ret = -ENOMEM;
goto end;
}
refcount_set(&idx_data->usecount, 1);
idx_data->tag = tag;
ret = xa_alloc(&st->idx_xa, &xa_id, idx_data, st->index_limit, GFP_KERNEL);
if (ret)
goto clean_idx_data;
ret = pcie_tph_set_st_entry(dev->pdev, xa_id, tag);
if (ret)
goto clean_idx_xa;
*st_index = xa_id;
goto end;
clean_idx_xa:
xa_erase(&st->idx_xa, xa_id);
clean_idx_data:
kfree(idx_data);
end:
mutex_unlock(&st->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mlx5_st_alloc_index);
int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index)
{
struct mlx5_st_idx_data *idx_data;
struct mlx5_st *st = dev->st;
int ret = 0;
if (!st)
return -EOPNOTSUPP;
mutex_lock(&st->lock);
idx_data = xa_load(&st->idx_xa, st_index);
if (WARN_ON_ONCE(!idx_data)) {
ret = -EINVAL;
goto end;
}
if (refcount_dec_and_test(&idx_data->usecount)) {
xa_erase(&st->idx_xa, st_index);
/* We leave PCI config space as was before, no mkey will refer to it */
}
end:
mutex_unlock(&st->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mlx5_st_dealloc_index);

View File

@@ -1102,6 +1102,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}
dev->dm = mlx5_dm_create(dev);
dev->st = mlx5_st_create(dev);
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
dev->rsc_dump = mlx5_rsc_dump_create(dev);
@@ -1150,6 +1151,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_rsc_dump_destroy(dev);
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_st_destroy(dev);
mlx5_dm_cleanup(dev);
mlx5_fs_core_free(dev);
mlx5_sf_table_cleanup(dev);

View File

@@ -300,6 +300,15 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
#ifdef CONFIG_PCIE_TPH
struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev);
void mlx5_st_destroy(struct mlx5_core_dev *dev);
#else
static inline struct mlx5_st *
mlx5_st_create(struct mlx5_core_dev *dev) { return NULL; }
static inline void mlx5_st_destroy(struct mlx5_core_dev *dev) { return; }
#endif
void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
enum mlx5_port_status status);