aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c132
1 files changed, 77 insertions, 55 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fbc775fbf712..8d54f3047768 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -43,13 +44,6 @@
* Chetan Loke : Implemented TPACKET_V3 block abstraction
* layer.
* Copyright (C) 2011, <lokec@ccs.neu.edu>
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/types.h>
@@ -390,7 +384,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
smp_wmb();
}
-static int __packet_get_status(struct packet_sock *po, void *frame)
+static int __packet_get_status(const struct packet_sock *po, void *frame)
{
union tpacket_uhdr h;
@@ -466,10 +460,10 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
return ts_status;
}
-static void *packet_lookup_frame(struct packet_sock *po,
- struct packet_ring_buffer *rb,
- unsigned int position,
- int status)
+static void *packet_lookup_frame(const struct packet_sock *po,
+ const struct packet_ring_buffer *rb,
+ unsigned int position,
+ int status)
{
unsigned int pg_vec_pos, frame_offset;
union tpacket_uhdr h;
@@ -764,7 +758,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
struct sock *sk = &po->sk;
- if (po->stats.stats3.tp_drops)
+ if (atomic_read(&po->tp_drops))
status |= TP_STATUS_LOSING;
last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -1009,7 +1003,6 @@ static void prb_fill_curr_block(char *curr,
/* Assumes caller has the sk->rx_queue.lock */
static void *__packet_lookup_frame_in_block(struct packet_sock *po,
struct sk_buff *skb,
- int status,
unsigned int len
)
{
@@ -1081,7 +1074,7 @@ static void *packet_current_rx_frame(struct packet_sock *po,
po->rx_ring.head, status);
return curr;
case TPACKET_V3:
- return __packet_lookup_frame_in_block(po, skb, status, len);
+ return __packet_lookup_frame_in_block(po, skb, len);
default:
WARN(1, "TPACKET version not supported\n");
BUG();
@@ -1089,10 +1082,10 @@ static void *packet_current_rx_frame(struct packet_sock *po,
}
}
-static void *prb_lookup_block(struct packet_sock *po,
- struct packet_ring_buffer *rb,
- unsigned int idx,
- int status)
+static void *prb_lookup_block(const struct packet_sock *po,
+ const struct packet_ring_buffer *rb,
+ unsigned int idx,
+ int status)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
@@ -1205,12 +1198,12 @@ static void packet_free_pending(struct packet_sock *po)
#define ROOM_LOW 0x1
#define ROOM_NORMAL 0x2
-static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
{
int idx, len;
- len = po->rx_ring.frame_max + 1;
- idx = po->rx_ring.head;
+ len = READ_ONCE(po->rx_ring.frame_max) + 1;
+ idx = READ_ONCE(po->rx_ring.head);
if (pow_off)
idx += len >> pow_off;
if (idx >= len)
@@ -1218,12 +1211,12 @@ static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}
-static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
{
int idx, len;
- len = po->rx_ring.prb_bdqc.knum_blocks;
- idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+ len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
+ idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
if (pow_off)
idx += len >> pow_off;
if (idx >= len)
@@ -1231,15 +1224,18 @@ static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}
-static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+static int __packet_rcv_has_room(const struct packet_sock *po,
+ const struct sk_buff *skb)
{
- struct sock *sk = &po->sk;
+ const struct sock *sk = &po->sk;
int ret = ROOM_NONE;
if (po->prot_hook.func != tpacket_rcv) {
- int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
- - (skb ? skb->truesize : 0);
- if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+ int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+ - (skb ? skb->truesize : 0);
+
+ if (avail > (rcvbuf >> ROOM_POW_OFF))
return ROOM_NORMAL;
else if (avail > 0)
return ROOM_LOW;
@@ -1264,19 +1260,24 @@ static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
- int ret;
- bool has_room;
+ int pressure, ret;
- spin_lock_bh(&po->sk.sk_receive_queue.lock);
ret = __packet_rcv_has_room(po, skb);
- has_room = ret == ROOM_NORMAL;
- if (po->pressure == has_room)
- po->pressure = !has_room;
- spin_unlock_bh(&po->sk.sk_receive_queue.lock);
+ pressure = ret != ROOM_NORMAL;
+
+ if (READ_ONCE(po->pressure) != pressure)
+ WRITE_ONCE(po->pressure, pressure);
return ret;
}
+static void packet_rcv_try_clear_pressure(struct packet_sock *po)
+{
+ if (READ_ONCE(po->pressure) &&
+ __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
+ WRITE_ONCE(po->pressure, 0);
+}
+
static void packet_sock_destruct(struct sock *sk)
{
skb_queue_purge(&sk->sk_error_queue);
@@ -1357,7 +1358,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
i = j = min_t(int, po->rollover->sock, num - 1);
do {
po_next = pkt_sk(f->arr[i]);
- if (po_next != po_skip && !po_next->pressure &&
+ if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
po->rollover->sock = i;
@@ -2132,10 +2133,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
drop_n_acct:
is_drop_n_account = true;
- spin_lock(&sk->sk_receive_queue.lock);
- po->stats.stats1.tp_drops++;
+ atomic_inc(&po->tp_drops);
atomic_inc(&sk->sk_drops);
- spin_unlock(&sk->sk_receive_queue.lock);
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2199,6 +2198,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (!res)
goto drop_n_restore;
+ /* If we are flooded, just give up */
+ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
+ atomic_inc(&po->tp_drops);
+ goto drop_n_restore;
+ }
+
if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
@@ -2269,7 +2274,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
* Anyways, moving it for V1/V2 only as V3 doesn't need this
* at packet level.
*/
- if (po->stats.stats1.tp_drops)
+ if (atomic_read(&po->tp_drops))
status |= TP_STATUS_LOSING;
}
@@ -2385,9 +2390,9 @@ drop:
return 0;
drop_n_account:
- is_drop_n_account = true;
- po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
+ atomic_inc(&po->tp_drops);
+ is_drop_n_account = true;
sk->sk_data_ready(sk);
kfree_skb(copy_skb);
@@ -2407,6 +2412,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+ if (!packet_read_pending(&po->tx_ring))
+ complete(&po->skb_completion);
}
sock_wfree(skb);
@@ -2591,7 +2599,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct net_device *dev;
struct virtio_net_hdr *vnet_hdr = NULL;
struct sockcm_cookie sockc;
@@ -2606,6 +2614,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
+ long timeo = 0;
mutex_lock(&po->pg_vec_lock);
@@ -2652,12 +2661,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ reinit_completion(&po->skb_completion);
+
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && need_resched())
- schedule();
+ if (need_wait && skb) {
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+ if (timeo <= 0) {
+ err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
+ goto out_put;
+ }
+ }
+ /* check for additional frames */
continue;
}
@@ -3014,8 +3032,8 @@ static int packet_release(struct socket *sock)
synchronize_net();
+ kfree(po->rollover);
if (f) {
- kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3213,6 +3231,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);
po = pkt_sk(sk);
+ init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
po->xmit = dev_queue_xmit;
@@ -3310,8 +3329,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (skb == NULL)
goto out;
- if (pkt_sk(sk)->pressure)
- packet_rcv_has_room(pkt_sk(sk), NULL);
+ packet_rcv_try_clear_pressure(pkt_sk(sk));
if (pkt_sk(sk)->has_vnet_hdr) {
err = packet_rcv_vnet(msg, skb, &len);
@@ -3883,6 +3901,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ int drops;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3899,14 +3918,17 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
memcpy(&st, &po->stats, sizeof(st));
memset(&po->stats, 0, sizeof(po->stats));
spin_unlock_bh(&sk->sk_receive_queue.lock);
+ drops = atomic_xchg(&po->tp_drops, 0);
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
- st.stats3.tp_packets += st.stats3.tp_drops;
+ st.stats3.tp_drops = drops;
+ st.stats3.tp_packets += drops;
data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
- st.stats1.tp_packets += st.stats1.tp_drops;
+ st.stats1.tp_drops = drops;
+ st.stats1.tp_packets += drops;
data = &st.stats1;
}
@@ -4125,8 +4147,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock,
TP_STATUS_KERNEL))
mask |= EPOLLIN | EPOLLRDNORM;
}
- if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
- po->pressure = 0;
+ packet_rcv_try_clear_pressure(po);
spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
@@ -4320,7 +4341,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
req3->tp_sizeof_priv ||
req3->tp_feature_req_word) {
err = -EINVAL;
- goto out;
+ goto out_free_pg_vec;
}
}
break;
@@ -4384,6 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
prb_shutdown_retire_blk_timer(po, rb_queue);
}
+out_free_pg_vec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out: