af_unix: Use cached value for SOCK_STREAM in unix_inq_len().

Compared to TCP, ioctl(SIOCINQ) for AF_UNIX SOCK_STREAM socket is more
expensive, as unix_inq_len() requires iterating through the receive queue
and accumulating skb->len.

Let's cache the value for SOCK_STREAM to a new field during sendmsg()
and recvmsg().

The field is protected by the receive queue lock.

Note that ioctl(SIOCINQ) for SOCK_DGRAM returns the length of the first
skb in the queue.

SOCK_SEQPACKET still requires iterating through the queue because we do
not touch functions shared with unix_dgram_ops.  But, if really needed,
we can support it by switching __skb_try_recv_datagram() to a custom
version.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250702223606.1054680-5-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Kuniyuki Iwashima
2025-07-02 22:35:16 +00:00
committed by Jakub Kicinski
parent d0aac85449
commit f4e1fb04c1
2 changed files with 29 additions and 10 deletions

View File

@@ -47,6 +47,7 @@ struct unix_sock {
#define peer_wait peer_wq.wait
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
int inq_len;
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
struct sk_buff *oob_skb;
#endif

View File

@@ -2297,6 +2297,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
spin_lock(&other->sk_receive_queue.lock);
WRITE_ONCE(ousk->oob_skb, skb);
WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -2319,6 +2320,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct sk_buff *skb = NULL;
struct sock *other = NULL;
struct unix_sock *otheru;
struct scm_cookie scm;
bool fds_sent = false;
int err, sent = 0;
@@ -2342,14 +2344,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_namelen) {
err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out_err;
} else {
other = unix_peer(sk);
if (!other) {
err = -ENOTCONN;
goto out_err;
}
}
other = unix_peer(sk);
if (!other) {
err = -ENOTCONN;
goto out_err;
}
otheru = unix_sk(other);
if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto out_pipe;
@@ -2417,7 +2421,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
unix_maybe_add_creds(skb, sk, other);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
spin_lock(&other->sk_receive_queue.lock);
WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
@@ -2704,6 +2713,7 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK)) {
WRITE_ONCE(u->oob_skb, NULL);
WRITE_ONCE(u->inq_len, u->inq_len - 1);
if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
!unix_skb_len(oob_skb->prev)) {
@@ -2808,6 +2818,8 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
return -EAGAIN;
}
WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb == u->oob_skb) {
WRITE_ONCE(u->oob_skb, NULL);
@@ -2988,7 +3000,11 @@ unlock:
if (unix_skb_len(skb))
break;
skb_unlink(skb, &sk->sk_receive_queue);
spin_lock(&sk->sk_receive_queue.lock);
WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
__skb_unlink(skb, &sk->sk_receive_queue);
spin_unlock(&sk->sk_receive_queue.lock);
consume_skb(skb);
if (scm.fp)
@@ -3159,9 +3175,11 @@ long unix_inq_len(struct sock *sk)
if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
return -EINVAL;
if (sk->sk_type == SOCK_STREAM)
return READ_ONCE(unix_sk(sk)->inq_len);
spin_lock(&sk->sk_receive_queue.lock);
if (sk->sk_type == SOCK_STREAM ||
sk->sk_type == SOCK_SEQPACKET) {
if (sk->sk_type == SOCK_SEQPACKET) {
skb_queue_walk(&sk->sk_receive_queue, skb)
amount += unix_skb_len(skb);
} else {