mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
Add a netdev_dmabuf_binding struct which represents the dma-buf-to-netdevice binding. The netlink API will bind the dma-buf to rx queues on the netdevice. On the binding, the dma_buf_attach & dma_buf_map_attachment will occur. The entries in the sg_table from mapping will be inserted into a genpool to make it ready for allocation. The chunks in the genpool are owned by a dmabuf_chunk_owner struct which holds the dma-buf offset of the base of the chunk and the dma_addr of the chunk. Both are needed to use allocations that come from this chunk. We create a new type that represents an allocation from the genpool: net_iov. We setup the net_iov allocation size in the genpool to PAGE_SIZE for simplicity: to match the PAGE_SIZE normally allocated by the page pool and given to the drivers. The user can unbind the dmabuf from the netdevice by closing the netlink socket that established the binding. We do this so that the binding is automatically unbound even if the userspace process crashes. The binding and unbinding leaves an indicator in struct netdev_rx_queue that the given queue is bound, and the binding is actuated by resetting the rx queue using the queue API. The netdev_dmabuf_binding struct is refcounted, and releases its resources only when all the refs are released. Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com> Signed-off-by: Mina Almasry <almasrymina@google.com> Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # excluding netlink Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Link: https://patch.msgid.link/20240910171458.219195-4-almasrymina@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
680 lines
18 KiB
YAML
680 lines
18 KiB
YAML
# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
|
|
|
|
name: netdev
|
|
|
|
doc:
|
|
netdev configuration over generic netlink.
|
|
|
|
definitions:
|
|
-
|
|
type: flags
|
|
name: xdp-act
|
|
render-max: true
|
|
entries:
|
|
-
|
|
name: basic
|
|
doc:
|
|
XDP features set supported by all drivers
|
|
(XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
|
|
-
|
|
name: redirect
|
|
doc:
|
|
The netdev supports XDP_REDIRECT
|
|
-
|
|
name: ndo-xmit
|
|
doc:
|
|
This feature informs if netdev implements ndo_xdp_xmit callback.
|
|
-
|
|
name: xsk-zerocopy
|
|
doc:
|
|
This feature informs if netdev supports AF_XDP in zero copy mode.
|
|
-
|
|
name: hw-offload
|
|
doc:
|
|
This feature informs if netdev supports XDP hw offloading.
|
|
-
|
|
name: rx-sg
|
|
doc:
|
|
This feature informs if netdev implements non-linear XDP buffer
|
|
support in the driver napi callback.
|
|
-
|
|
name: ndo-xmit-sg
|
|
doc:
|
|
This feature informs if netdev implements non-linear XDP buffer
|
|
support in ndo_xdp_xmit callback.
|
|
-
|
|
type: flags
|
|
name: xdp-rx-metadata
|
|
entries:
|
|
-
|
|
name: timestamp
|
|
doc:
|
|
Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
|
|
-
|
|
name: hash
|
|
doc:
|
|
Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
|
|
-
|
|
name: vlan-tag
|
|
doc:
|
|
Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
|
|
-
|
|
type: flags
|
|
name: xsk-flags
|
|
entries:
|
|
-
|
|
name: tx-timestamp
|
|
doc:
|
|
HW timestamping egress packets is supported by the driver.
|
|
-
|
|
name: tx-checksum
|
|
doc:
|
|
L3 checksum HW offload is supported by the driver.
|
|
-
|
|
name: queue-type
|
|
type: enum
|
|
entries: [ rx, tx ]
|
|
-
|
|
name: qstats-scope
|
|
type: flags
|
|
entries: [ queue ]
|
|
|
|
attribute-sets:
|
|
-
|
|
name: dev
|
|
attributes:
|
|
-
|
|
name: ifindex
|
|
doc: netdev ifindex
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: pad
|
|
type: pad
|
|
-
|
|
name: xdp-features
|
|
doc: Bitmask of enabled xdp-features.
|
|
type: u64
|
|
enum: xdp-act
|
|
-
|
|
name: xdp-zc-max-segs
|
|
doc: max fragment count supported by ZC driver
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: xdp-rx-metadata-features
|
|
doc: Bitmask of supported XDP receive metadata features.
|
|
See Documentation/networking/xdp-rx-metadata.rst for more details.
|
|
type: u64
|
|
enum: xdp-rx-metadata
|
|
-
|
|
name: xsk-features
|
|
doc: Bitmask of enabled AF_XDP features.
|
|
type: u64
|
|
enum: xsk-flags
|
|
-
|
|
name: page-pool
|
|
attributes:
|
|
-
|
|
name: id
|
|
doc: Unique ID of a Page Pool instance.
|
|
type: uint
|
|
checks:
|
|
min: 1
|
|
max: u32-max
|
|
-
|
|
name: ifindex
|
|
doc: |
|
|
ifindex of the netdev to which the pool belongs.
|
|
May be reported as 0 if the page pool was allocated for a netdev
|
|
which got destroyed already (page pools may outlast their netdevs
|
|
because they wait for all memory to be returned).
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
max: s32-max
|
|
-
|
|
name: napi-id
|
|
doc: Id of NAPI using this Page Pool instance.
|
|
type: uint
|
|
checks:
|
|
min: 1
|
|
max: u32-max
|
|
-
|
|
name: inflight
|
|
type: uint
|
|
doc: |
|
|
Number of outstanding references to this page pool (allocated
|
|
but yet to be freed pages). Allocated pages may be held in
|
|
socket receive queues, driver receive ring, page pool recycling
|
|
ring, the page pool cache, etc.
|
|
-
|
|
name: inflight-mem
|
|
type: uint
|
|
doc: |
|
|
Amount of memory held by inflight pages.
|
|
-
|
|
name: detach-time
|
|
type: uint
|
|
doc: |
|
|
Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
|
|
the driver. Once detached Page Pool can no longer be used to
|
|
allocate memory.
|
|
Page Pools wait for all the memory allocated from them to be freed
|
|
before truly disappearing. "Detached" Page Pools cannot be
|
|
"re-attached", they are just waiting to disappear.
|
|
Attribute is absent if Page Pool has not been detached, and
|
|
can still be used to allocate new memory.
|
|
-
|
|
name: page-pool-info
|
|
subset-of: page-pool
|
|
attributes:
|
|
-
|
|
name: id
|
|
-
|
|
name: ifindex
|
|
-
|
|
name: page-pool-stats
|
|
doc: |
|
|
Page pool statistics, see docs for struct page_pool_stats
|
|
for information about individual statistics.
|
|
attributes:
|
|
-
|
|
name: info
|
|
doc: Page pool identifying information.
|
|
type: nest
|
|
nested-attributes: page-pool-info
|
|
-
|
|
name: alloc-fast
|
|
type: uint
|
|
value: 8 # reserve some attr ids in case we need more metadata later
|
|
-
|
|
name: alloc-slow
|
|
type: uint
|
|
-
|
|
name: alloc-slow-high-order
|
|
type: uint
|
|
-
|
|
name: alloc-empty
|
|
type: uint
|
|
-
|
|
name: alloc-refill
|
|
type: uint
|
|
-
|
|
name: alloc-waive
|
|
type: uint
|
|
-
|
|
name: recycle-cached
|
|
type: uint
|
|
-
|
|
name: recycle-cache-full
|
|
type: uint
|
|
-
|
|
name: recycle-ring
|
|
type: uint
|
|
-
|
|
name: recycle-ring-full
|
|
type: uint
|
|
-
|
|
name: recycle-released-refcnt
|
|
type: uint
|
|
|
|
-
|
|
name: napi
|
|
attributes:
|
|
-
|
|
name: ifindex
|
|
doc: ifindex of the netdevice to which NAPI instance belongs.
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: id
|
|
doc: ID of the NAPI instance.
|
|
type: u32
|
|
-
|
|
name: irq
|
|
doc: The associated interrupt vector number for the napi
|
|
type: u32
|
|
-
|
|
name: pid
|
|
doc: PID of the napi thread, if NAPI is configured to operate in
|
|
threaded mode. If NAPI is not in threaded mode (i.e. uses normal
|
|
softirq context), the attribute will be absent.
|
|
type: u32
|
|
-
|
|
name: queue
|
|
attributes:
|
|
-
|
|
name: id
|
|
doc: Queue index; most queue types are indexed like a C array, with
|
|
indexes starting at 0 and ending at queue count - 1. Queue indexes
|
|
are scoped to an interface and queue type.
|
|
type: u32
|
|
-
|
|
name: ifindex
|
|
doc: ifindex of the netdevice to which the queue belongs.
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: type
|
|
doc: Queue type as rx, tx. Each queue type defines a separate ID space.
|
|
type: u32
|
|
enum: queue-type
|
|
-
|
|
name: napi-id
|
|
doc: ID of the NAPI instance which services this queue.
|
|
type: u32
|
|
|
|
-
|
|
name: qstats
|
|
doc: |
|
|
Get device statistics, scoped to a device or a queue.
|
|
These statistics extend (and partially duplicate) statistics available
|
|
in struct rtnl_link_stats64.
|
|
Value of the `scope` attribute determines how statistics are
|
|
aggregated. When aggregated for the entire device the statistics
|
|
represent the total number of events since last explicit reset of
|
|
the device (i.e. not a reconfiguration like changing queue count).
|
|
When reported per-queue, however, the statistics may not add
|
|
up to the total number of events, will only be reported for currently
|
|
active objects, and will likely report the number of events since last
|
|
reconfiguration.
|
|
attributes:
|
|
-
|
|
name: ifindex
|
|
doc: ifindex of the netdevice to which stats belong.
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: queue-type
|
|
doc: Queue type as rx, tx, for queue-id.
|
|
type: u32
|
|
enum: queue-type
|
|
-
|
|
name: queue-id
|
|
doc: Queue ID, if stats are scoped to a single queue instance.
|
|
type: u32
|
|
-
|
|
name: scope
|
|
doc: |
|
|
What object type should be used to iterate over the stats.
|
|
type: uint
|
|
enum: qstats-scope
|
|
-
|
|
name: rx-packets
|
|
doc: |
|
|
Number of wire packets successfully received and passed to the stack.
|
|
For drivers supporting XDP, XDP is considered the first layer
|
|
of the stack, so packets consumed by XDP are still counted here.
|
|
type: uint
|
|
value: 8 # reserve some attr ids in case we need more metadata later
|
|
-
|
|
name: rx-bytes
|
|
doc: Successfully received bytes, see `rx-packets`.
|
|
type: uint
|
|
-
|
|
name: tx-packets
|
|
doc: |
|
|
Number of wire packets successfully sent. Packet is considered to be
|
|
successfully sent once it is in device memory (usually this means
|
|
the device has issued a DMA completion for the packet).
|
|
type: uint
|
|
-
|
|
name: tx-bytes
|
|
doc: Successfully sent bytes, see `tx-packets`.
|
|
type: uint
|
|
-
|
|
name: rx-alloc-fail
|
|
doc: |
|
|
Number of times skb or buffer allocation failed on the Rx datapath.
|
|
Allocation failure may, or may not result in a packet drop, depending
|
|
on driver implementation and whether system recovers quickly.
|
|
type: uint
|
|
-
|
|
name: rx-hw-drops
|
|
doc: |
|
|
Number of all packets which entered the device, but never left it,
|
|
including but not limited to: packets dropped due to lack of buffer
|
|
space, processing errors, explicit or implicit policies and packet
|
|
filters.
|
|
type: uint
|
|
-
|
|
name: rx-hw-drop-overruns
|
|
doc: |
|
|
Number of packets dropped due to transient lack of resources, such as
|
|
buffer space, host descriptors etc.
|
|
type: uint
|
|
-
|
|
name: rx-csum-complete
|
|
doc: Number of packets that were marked as CHECKSUM_COMPLETE.
|
|
type: uint
|
|
-
|
|
name: rx-csum-unnecessary
|
|
doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
|
|
type: uint
|
|
-
|
|
name: rx-csum-none
|
|
doc: Number of packets that were not checksummed by device.
|
|
type: uint
|
|
-
|
|
name: rx-csum-bad
|
|
doc: |
|
|
Number of packets with bad checksum. The packets are not discarded,
|
|
but still delivered to the stack.
|
|
type: uint
|
|
-
|
|
name: rx-hw-gro-packets
|
|
doc: |
|
|
Number of packets that were coalesced from smaller packets by the device.
|
|
Counts only packets coalesced with the HW-GRO netdevice feature,
|
|
LRO-coalesced packets are not counted.
|
|
type: uint
|
|
-
|
|
name: rx-hw-gro-bytes
|
|
doc: See `rx-hw-gro-packets`.
|
|
type: uint
|
|
-
|
|
name: rx-hw-gro-wire-packets
|
|
doc: |
|
|
Number of packets that were coalesced to bigger packetss with the HW-GRO
|
|
netdevice feature. LRO-coalesced packets are not counted.
|
|
type: uint
|
|
-
|
|
name: rx-hw-gro-wire-bytes
|
|
doc: See `rx-hw-gro-wire-packets`.
|
|
type: uint
|
|
-
|
|
name: rx-hw-drop-ratelimits
|
|
doc: |
|
|
Number of the packets dropped by the device due to the received
|
|
packets bitrate exceeding the device rate limit.
|
|
type: uint
|
|
-
|
|
name: tx-hw-drops
|
|
doc: |
|
|
Number of packets that arrived at the device but never left it,
|
|
encompassing packets dropped for reasons such as processing errors, as
|
|
well as those affected by explicitly defined policies and packet
|
|
filtering criteria.
|
|
type: uint
|
|
-
|
|
name: tx-hw-drop-errors
|
|
doc: Number of packets dropped because they were invalid or malformed.
|
|
type: uint
|
|
-
|
|
name: tx-csum-none
|
|
doc: |
|
|
Number of packets that did not require the device to calculate the
|
|
checksum.
|
|
type: uint
|
|
-
|
|
name: tx-needs-csum
|
|
doc: |
|
|
Number of packets that required the device to calculate the checksum.
|
|
type: uint
|
|
-
|
|
name: tx-hw-gso-packets
|
|
doc: |
|
|
Number of packets that necessitated segmentation into smaller packets
|
|
by the device.
|
|
type: uint
|
|
-
|
|
name: tx-hw-gso-bytes
|
|
doc: See `tx-hw-gso-packets`.
|
|
type: uint
|
|
-
|
|
name: tx-hw-gso-wire-packets
|
|
doc: |
|
|
Number of wire-sized packets generated by processing
|
|
`tx-hw-gso-packets`
|
|
type: uint
|
|
-
|
|
name: tx-hw-gso-wire-bytes
|
|
doc: See `tx-hw-gso-wire-packets`.
|
|
type: uint
|
|
-
|
|
name: tx-hw-drop-ratelimits
|
|
doc: |
|
|
Number of the packets dropped by the device due to the transmit
|
|
packets bitrate exceeding the device rate limit.
|
|
type: uint
|
|
-
|
|
name: tx-stop
|
|
doc: |
|
|
Number of times driver paused accepting new tx packets
|
|
from the stack to this queue, because the queue was full.
|
|
Note that if BQL is supported and enabled on the device
|
|
the networking stack will avoid queuing a lot of data at once.
|
|
type: uint
|
|
-
|
|
name: tx-wake
|
|
doc: |
|
|
Number of times driver re-started accepting send
|
|
requests to this queue from the stack.
|
|
type: uint
|
|
-
|
|
name: queue-id
|
|
subset-of: queue
|
|
attributes:
|
|
-
|
|
name: id
|
|
-
|
|
name: type
|
|
-
|
|
name: dmabuf
|
|
attributes:
|
|
-
|
|
name: ifindex
|
|
doc: netdev ifindex to bind the dmabuf to.
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
-
|
|
name: queues
|
|
doc: receive queues to bind the dmabuf to.
|
|
type: nest
|
|
nested-attributes: queue-id
|
|
multi-attr: true
|
|
-
|
|
name: fd
|
|
doc: dmabuf file descriptor to bind.
|
|
type: u32
|
|
-
|
|
name: id
|
|
doc: id of the dmabuf binding
|
|
type: u32
|
|
checks:
|
|
min: 1
|
|
|
|
operations:
|
|
list:
|
|
-
|
|
name: dev-get
|
|
doc: Get / dump information about a netdev.
|
|
attribute-set: dev
|
|
do:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
reply: &dev-all
|
|
attributes:
|
|
- ifindex
|
|
- xdp-features
|
|
- xdp-zc-max-segs
|
|
- xdp-rx-metadata-features
|
|
- xsk-features
|
|
dump:
|
|
reply: *dev-all
|
|
-
|
|
name: dev-add-ntf
|
|
doc: Notification about device appearing.
|
|
notify: dev-get
|
|
mcgrp: mgmt
|
|
-
|
|
name: dev-del-ntf
|
|
doc: Notification about device disappearing.
|
|
notify: dev-get
|
|
mcgrp: mgmt
|
|
-
|
|
name: dev-change-ntf
|
|
doc: Notification about device configuration being changed.
|
|
notify: dev-get
|
|
mcgrp: mgmt
|
|
-
|
|
name: page-pool-get
|
|
doc: |
|
|
Get / dump information about Page Pools.
|
|
(Only Page Pools associated with a net_device can be listed.)
|
|
attribute-set: page-pool
|
|
do:
|
|
request:
|
|
attributes:
|
|
- id
|
|
reply: &pp-reply
|
|
attributes:
|
|
- id
|
|
- ifindex
|
|
- napi-id
|
|
- inflight
|
|
- inflight-mem
|
|
- detach-time
|
|
dump:
|
|
reply: *pp-reply
|
|
config-cond: page-pool
|
|
-
|
|
name: page-pool-add-ntf
|
|
doc: Notification about page pool appearing.
|
|
notify: page-pool-get
|
|
mcgrp: page-pool
|
|
config-cond: page-pool
|
|
-
|
|
name: page-pool-del-ntf
|
|
doc: Notification about page pool disappearing.
|
|
notify: page-pool-get
|
|
mcgrp: page-pool
|
|
config-cond: page-pool
|
|
-
|
|
name: page-pool-change-ntf
|
|
doc: Notification about page pool configuration being changed.
|
|
notify: page-pool-get
|
|
mcgrp: page-pool
|
|
config-cond: page-pool
|
|
-
|
|
name: page-pool-stats-get
|
|
doc: Get page pool statistics.
|
|
attribute-set: page-pool-stats
|
|
do:
|
|
request:
|
|
attributes:
|
|
- info
|
|
reply: &pp-stats-reply
|
|
attributes:
|
|
- info
|
|
- alloc-fast
|
|
- alloc-slow
|
|
- alloc-slow-high-order
|
|
- alloc-empty
|
|
- alloc-refill
|
|
- alloc-waive
|
|
- recycle-cached
|
|
- recycle-cache-full
|
|
- recycle-ring
|
|
- recycle-ring-full
|
|
- recycle-released-refcnt
|
|
dump:
|
|
reply: *pp-stats-reply
|
|
config-cond: page-pool-stats
|
|
-
|
|
name: queue-get
|
|
doc: Get queue information from the kernel.
|
|
Only configured queues will be reported (as opposed to all available
|
|
hardware queues).
|
|
attribute-set: queue
|
|
do:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
- type
|
|
- id
|
|
reply: &queue-get-op
|
|
attributes:
|
|
- id
|
|
- type
|
|
- napi-id
|
|
- ifindex
|
|
dump:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
reply: *queue-get-op
|
|
-
|
|
name: napi-get
|
|
doc: Get information about NAPI instances configured on the system.
|
|
attribute-set: napi
|
|
do:
|
|
request:
|
|
attributes:
|
|
- id
|
|
reply: &napi-get-op
|
|
attributes:
|
|
- id
|
|
- ifindex
|
|
- irq
|
|
- pid
|
|
dump:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
reply: *napi-get-op
|
|
-
|
|
name: qstats-get
|
|
doc: |
|
|
Get / dump fine grained statistics. Which statistics are reported
|
|
depends on the device and the driver, and whether the driver stores
|
|
software counters per-queue.
|
|
attribute-set: qstats
|
|
dump:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
- scope
|
|
reply:
|
|
attributes:
|
|
- ifindex
|
|
- queue-type
|
|
- queue-id
|
|
- rx-packets
|
|
- rx-bytes
|
|
- tx-packets
|
|
- tx-bytes
|
|
-
|
|
name: bind-rx
|
|
doc: Bind dmabuf to netdev
|
|
attribute-set: dmabuf
|
|
flags: [ admin-perm ]
|
|
do:
|
|
request:
|
|
attributes:
|
|
- ifindex
|
|
- fd
|
|
- queues
|
|
reply:
|
|
attributes:
|
|
- id
|
|
|
|
kernel-family:
|
|
headers: [ "linux/list.h"]
|
|
sock-priv: struct list_head
|
|
|
|
mcast-groups:
|
|
list:
|
|
-
|
|
name: mgmt
|
|
-
|
|
name: page-pool
|