Files
linux/net/vmw_vsock/vsock_bpf.c
Eric Dumazet 8341c989ac net: remove addr_len argument of recvmsg() handlers
Use msg->msg_namelen as a place holder instead of a
temporary variable, notably in inet[6]_recvmsg().

This removes stack canaries and allows tail-calls.

$ scripts/bloat-o-meter -t vmlinux.old vmlinux
add/remove: 0/0 grow/shrink: 2/19 up/down: 26/-532 (-506)
Function                                     old     new   delta
rawv6_recvmsg                                744     767     +23
vsock_dgram_recvmsg                           55      58      +3
vsock_connectible_recvmsg                     50      47      -3
unix_stream_recvmsg                          161     158      -3
unix_seqpacket_recvmsg                        62      59      -3
unix_dgram_recvmsg                            42      39      -3
tcp_recvmsg                                  546     543      -3
mptcp_recvmsg                               1568    1565      -3
ping_recvmsg                                 806     800      -6
tcp_bpf_recvmsg_parser                       983     974      -9
ip_recv_error                                588     576     -12
ipv6_recv_rxpmtu                             442     428     -14
udp_recvmsg                                 1243    1224     -19
ipv6_recv_error                             1046    1024     -22
udpv6_recvmsg                               1487    1461     -26
raw_recvmsg                                  465     437     -28
udp_bpf_recvmsg                             1027     984     -43
sock_common_recvmsg                          103      27     -76
inet_recvmsg                                 257     175     -82
inet6_recvmsg                                257     175     -82
tcp_bpf_recvmsg                              663     568     -95
Total: Before=25143834, After=25143328, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260227151120.1346573-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-03-02 18:17:17 -08:00

176 lines
4.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Bobby Eshleman <bobby.eshleman@bytedance.com>
*
* Based off of net/unix/unix_bpf.c
*/
#include <linux/bpf.h>
#include <linux/module.h>
#include <linux/skmsg.h>
#include <linux/socket.h>
#include <linux/wait.h>
#include <net/af_vsock.h>
#include <net/sock.h>
#define vsock_sk_has_data(__sk, __psock) \
({ !skb_queue_empty(&(__sk)->sk_receive_queue) || \
!skb_queue_empty(&(__psock)->ingress_skb) || \
!list_empty(&(__psock)->ingress_msg); \
})
static struct proto *vsock_prot_saved __read_mostly;
static DEFINE_SPINLOCK(vsock_prot_lock);
static struct proto vsock_bpf_prot;
static bool vsock_has_data(struct sock *sk, struct sk_psock *psock)
{
struct vsock_sock *vsk = vsock_sk(sk);
s64 ret;
ret = vsock_connectible_has_data(vsk);
if (ret > 0)
return true;
return vsock_sk_has_data(sk, psock);
}
static bool vsock_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo)
{
bool ret;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (sk->sk_shutdown & RCV_SHUTDOWN)
return true;
if (!timeo)
return false;
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
ret = vsock_has_data(sk, psock);
if (!ret) {
wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
ret = vsock_has_data(sk, psock);
}
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
return ret;
}
static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags)
{
struct socket *sock = sk->sk_socket;
int err;
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
err = __vsock_connectible_recvmsg(sock, msg, len, flags);
else if (sk->sk_type == SOCK_DGRAM)
err = __vsock_dgram_recvmsg(sock, msg, len, flags);
else
err = -EPROTOTYPE;
return err;
}
static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags)
{
struct sk_psock *psock;
struct vsock_sock *vsk;
int copied;
psock = sk_psock_get(sk);
if (unlikely(!psock))
return __vsock_recvmsg(sk, msg, len, flags);
lock_sock(sk);
vsk = vsock_sk(sk);
if (WARN_ON_ONCE(!vsk->transport)) {
copied = -ENODEV;
goto out;
}
if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
release_sock(sk);
sk_psock_put(sk, psock);
return __vsock_recvmsg(sk, msg, len, flags);
}
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
while (copied == 0) {
long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
if (!vsock_msg_wait_data(sk, psock, timeo)) {
copied = -EAGAIN;
break;
}
if (sk_psock_queue_empty(psock)) {
release_sock(sk);
sk_psock_put(sk, psock);
return __vsock_recvmsg(sk, msg, len, flags);
}
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
}
out:
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
}
static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = vsock_bpf_recvmsg;
prot->sock_is_readable = sk_msg_is_readable;
}
static void vsock_bpf_check_needs_rebuild(struct proto *ops)
{
/* Paired with the smp_store_release() below. */
if (unlikely(ops != smp_load_acquire(&vsock_prot_saved))) {
spin_lock_bh(&vsock_prot_lock);
if (likely(ops != vsock_prot_saved)) {
vsock_bpf_rebuild_protos(&vsock_bpf_prot, ops);
/* Make sure proto function pointers are updated before publishing the
* pointer to the struct.
*/
smp_store_release(&vsock_prot_saved, ops);
}
spin_unlock_bh(&vsock_prot_lock);
}
}
int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
struct vsock_sock *vsk;
if (restore) {
sk->sk_write_space = psock->saved_write_space;
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
vsk = vsock_sk(sk);
if (!vsk->transport)
return -ENODEV;
if (!vsk->transport->read_skb)
return -EOPNOTSUPP;
vsock_bpf_check_needs_rebuild(psock->sk_proto);
sock_replace_proto(sk, &vsock_bpf_prot);
return 0;
}
void __init vsock_bpf_build_proto(void)
{
vsock_bpf_rebuild_protos(&vsock_bpf_prot, &vsock_proto);
}