Files
linux/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
Michal Kubiak 93f53db9f9 ice: switch to Page Pool
This patch completes the transition of the ice driver to use the Page Pool
and libeth APIs, following the same direction as commit 5fa4caff59
("iavf: switch to Page Pool"). With the legacy page splitting and recycling
logic already removed, the driver is now in a clean state to adopt the
modern memory model.

The Page Pool integration simplifies buffer management by offloading
DMA mapping and recycling to the core infrastructure. This eliminates
the need for driver-specific handling of headroom, buffer sizing, and
page order. The libeth helper is used for CPU-side processing, while
DMA-for-device is handled by the Page Pool core.

Additionally, this patch extends the conversion to cover XDP support.
The driver now uses libeth_xdp helpers for Rx buffer processing,
and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all
buffers are mapped as bi-directional up front, leveraging Page Pool's
lifecycle management. This significantly reduces overhead in virtualized
environments.

Performance observations:
- In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains
  on par with the previous implementation.
- In XDP_TX mode:
  * With IOMMU enabled, performance improves dramatically - over 5x
    increase - due to reduced DMA mapping overhead and better memory reuse.
  * With IOMMU disabled, performance remains comparable to the previous
    implementation, with no significant changes observed.
- In XDP_DROP mode:
  * For small MTUs, (where multiple buffers can be allocated on a single
    memory page), a performance drop of approximately 20% is observed.
    According to 'perf top' analysis, the bottleneck is caused by atomic
    reference counter increments in the Page Pool.
  * For normal MTUs, (where only one buffer can be allocated within a
    single memory page), performance remains comparable to baseline
    levels.

This change is also a step toward a more modular and unified XDP
implementation across Intel Ethernet drivers, aligning with ongoing
efforts to consolidate and streamline feature support.

Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Michal Kubiak <michal.kubiak@intel.com>
Tested-by: Alexander Nowlin <alexander.nowlin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2025-10-29 13:55:16 -07:00

139 lines
4.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019, Intel Corporation. */
#ifndef _ICE_TXRX_LIB_H_
#define _ICE_TXRX_LIB_H_
#include "ice.h"
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @status_err_n: Rx descriptor status_error0 or status_error1 bits
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
* value of the mask which is really only used for boolean tests.
* The status_error_len doesn't need to be shifted because it begins
* at offset zero.
*/
static inline bool
ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
{
return !!(status_err_n & cpu_to_le16(stat_err_bits));
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
*
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static inline bool
ice_is_non_eop(const struct ice_rx_ring *rx_ring,
const union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->ring_stats->rx_stats.non_eop_descs++;
return true;
}
static inline __le64
ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
(td_cmd << ICE_TXD_QW1_CMD_S) |
(td_offset << ICE_TXD_QW1_OFFSET_S) |
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
(td_tag << ICE_TXD_QW1_L2TAG1_S));
}
/**
* ice_build_tstamp_desc - build Tx time stamp descriptor
* @tx_desc: Tx LAN descriptor index
* @tstamp: time stamp
*
* Return: Tx time stamp descriptor
*/
static inline __le32
ice_build_tstamp_desc(u16 tx_desc, u32 tstamp)
{
return cpu_to_le32(FIELD_PREP(ICE_TXTIME_TX_DESC_IDX_M, tx_desc) |
FIELD_PREP(ICE_TXTIME_STAMP_M, tstamp));
}
/**
* ice_get_vlan_tci - get VLAN TCI from Rx flex descriptor
* @rx_desc: Rx 32b flex descriptor with RXDID=2
*
* The OS and current PF implementation only support stripping a single VLAN tag
* at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
* one is found return the tag, else return 0 to mean no VLAN tag was found.
*/
static inline u16
ice_get_vlan_tci(const union ice_32b_rx_flex_desc *rx_desc)
{
u16 stat_err_bits;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
return le16_to_cpu(rx_desc->wb.l2tag1);
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
return 0;
}
/**
* ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
*
* This function updates the XDP Tx ring tail register.
*/
static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
{
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*
* returns index of descriptor that had RS bit produced on
*/
static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring)
{
u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, rs_idx);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
return rs_idx;
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx);
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
bool frame);
void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb);
void
ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci);
#endif /* !_ICE_TXRX_LIB_H_ */