aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c135
1 files changed, 81 insertions, 54 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 46943a18a10d..6ce8dd19f33c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -49,6 +49,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/ethtool.h>
+#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
@@ -459,7 +460,7 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
return TP_STATUS_TS_RAW_HARDWARE;
if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
- ktime_to_timespec64_cond(skb->tstamp, ts))
+ ktime_to_timespec64_cond(skb_tstamp(skb), ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
@@ -1349,7 +1350,7 @@ static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
if (READ_ONCE(history[i]) == rxhash)
count++;
- victim = prandom_u32() % ROLLOVER_HLEN;
+ victim = prandom_u32_max(ROLLOVER_HLEN);
/* Avoid dirtying the cache line if possible */
if (READ_ONCE(history[victim]) != rxhash)
@@ -1773,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
@@ -1787,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -1900,7 +1905,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
*/
spkt->spkt_family = dev->type;
- strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+ strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
spkt->spkt_protocol = skb->protocol;
/*
@@ -1919,12 +1924,22 @@ oom:
static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
+ int depth;
+
if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
sock->type == SOCK_RAW) {
skb_reset_mac_header(skb);
skb->protocol = dev_parse_header_protocol(skb);
}
+ /* Move network header to the right position for VLAN tagged packets */
+ if (likely(skb->dev->type == ARPHRD_ETHER) &&
+ eth_type_vlan(skb->protocol) &&
+ __vlan_get_protocol(skb, skb->protocol, &depth) != 0) {
+ if (pskb_may_pull(skb, depth))
+ skb_set_network_header(skb, depth);
+ }
+
skb_probe_transport_header(skb);
}
@@ -2194,6 +2209,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
sock_skb_set_dropcount(sk, skb);
+ skb_clear_delivery_time(skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
@@ -2312,8 +2328,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -2372,6 +2391,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
+ skb_clear_delivery_time(copy_skb);
__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
}
spin_unlock(&sk->sk_receive_queue.lock);
@@ -2848,8 +2868,9 @@ tpacket_error:
status = TP_STATUS_SEND_REQUEST;
err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -3016,8 +3037,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
- if (sock->type == SOCK_RAW &&
- !dev_validate_header(dev, skb->data, len)) {
+ if ((sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
err = -EINVAL;
goto out_free;
}
@@ -3036,6 +3057,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
+ if (unlikely(extra_len == 4))
+ skb->no_fcs = 1;
+
+ packet_parse_headers(skb, sock);
+
if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
@@ -3044,14 +3070,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- packet_parse_headers(skb, sock);
-
- if (unlikely(extra_len == 4))
- skb->no_fcs = 1;
-
err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
@@ -3102,16 +3127,14 @@ static int packet_release(struct socket *sock)
sk_del_node_init_rcu(sk);
mutex_unlock(&net->packet.sklist_lock);
- preempt_disable();
sock_prot_inuse_add(net, sk->sk_prot, -1);
- preempt_enable();
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
packet_cached_dev_reset(po);
if (po->prot_hook.dev) {
- dev_put(po->prot_hook.dev);
+ netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
@@ -3163,12 +3186,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
__be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
- struct net_device *dev_curr;
- __be16 proto_curr;
- bool need_rehook;
struct net_device *dev = NULL;
- int ret = 0;
bool unlisted = false;
+ bool need_rehook;
+ int ret = 0;
lock_sock(sk);
spin_lock(&po->bind_lock);
@@ -3193,14 +3214,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- dev_hold(dev);
-
- proto_curr = po->prot_hook.type;
- dev_curr = po->prot_hook.dev;
-
- need_rehook = proto_curr != proto || dev_curr != dev;
+ need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
if (need_rehook) {
+ dev_hold(dev);
if (po->running) {
rcu_read_unlock();
/* prevents packet_notifier() from calling
@@ -3209,7 +3226,6 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
- dev_curr = po->prot_hook.dev;
if (dev)
unlisted = !dev_get_by_index_rcu(sock_net(sk),
dev->ifindex);
@@ -3219,18 +3235,21 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
+ netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
+
if (unlikely(unlisted)) {
- dev_put(dev);
po->prot_hook.dev = NULL;
WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
+ netdev_hold(dev, &po->prot_hook.dev_tracker,
+ GFP_ATOMIC);
po->prot_hook.dev = dev;
WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
+ dev_put(dev);
}
- dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
@@ -3358,6 +3377,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3368,9 +3388,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sk_add_node_tail_rcu(sk, &net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
- preempt_disable();
sock_prot_inuse_add(net, &packet_proto, 1);
- preempt_enable();
return 0;
out2:
@@ -3418,7 +3436,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
* but then it will block.
*/
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
/*
* An error occurred so return it. Because skb_recv_datagram()
@@ -3461,9 +3479,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sll->sll_protocol = skb->protocol;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3486,6 +3506,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
@@ -3541,7 +3565,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
- strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
+ strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
return sizeof(*uaddr);
@@ -3939,7 +3963,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
@@ -4142,7 +4167,8 @@ static int packet_notifier(struct notifier_block *this,
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- dev_put(po->prot_hook.dev);
+ netdev_put(po->prot_hook.dev,
+ &po->prot_hook.dev_tracker);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
@@ -4492,9 +4518,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
out_free_pg_vec:
- bitmap_free(rx_owner_map);
- if (pg_vec)
+ if (pg_vec) {
+ bitmap_free(rx_owner_map);
free_pg_vec(pg_vec, order, req->tp_block_nr);
+ }
out:
return err;
}
@@ -4698,37 +4725,37 @@ static struct pernet_operations packet_net_ops = {
static void __exit packet_exit(void)
{
- unregister_netdevice_notifier(&packet_netdev_notifier);
- unregister_pernet_subsys(&packet_net_ops);
sock_unregister(PF_PACKET);
proto_unregister(&packet_proto);
+ unregister_netdevice_notifier(&packet_netdev_notifier);
+ unregister_pernet_subsys(&packet_net_ops);
}
static int __init packet_init(void)
{
int rc;
- rc = proto_register(&packet_proto, 0);
- if (rc)
- goto out;
- rc = sock_register(&packet_family_ops);
- if (rc)
- goto out_proto;
rc = register_pernet_subsys(&packet_net_ops);
if (rc)
- goto out_sock;
+ goto out;
rc = register_netdevice_notifier(&packet_netdev_notifier);
if (rc)
goto out_pernet;
+ rc = proto_register(&packet_proto, 0);
+ if (rc)
+ goto out_notifier;
+ rc = sock_register(&packet_family_ops);
+ if (rc)
+ goto out_proto;
return 0;
-out_pernet:
- unregister_pernet_subsys(&packet_net_ops);
-out_sock:
- sock_unregister(PF_PACKET);
out_proto:
proto_unregister(&packet_proto);
+out_notifier:
+ unregister_netdevice_notifier(&packet_netdev_notifier);
+out_pernet:
+ unregister_pernet_subsys(&packet_net_ops);
out:
return rc;
}