diff options
43 files changed, 476 insertions, 179 deletions
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 84ee60fceef2..fd96e4a3cef9 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -431,7 +431,7 @@ patchwork checks Checks in patchwork are mostly simple wrappers around existing kernel scripts, the sources are available at: -https://github.com/kuba-moo/nipa/tree/master/tests +https://github.com/linux-netdev/nipa/tree/master/tests **Do not** post your patches just to run them through the checks. You must ensure that your patches are ready by testing them locally diff --git a/MAINTAINERS b/MAINTAINERS index a0697e2fb8e8..466a0fc46f76 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15237,6 +15237,8 @@ F: Documentation/networking/ F: Documentation/networking/net_cachelines/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ +F: include/linux/framer/framer-provider.h +F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb..472f0263dbc6 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include <linux/seqlock.h> #include <uapi/asm/vsyscall.h> +#include <asm/page_types.h> #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241..d6375b3c633b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec..42115ac079cf 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/vsyscall.h> + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -16,6 +18,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) return false; /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early * exception handlers. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e5d6156655ba..516adb50f9f6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -415,6 +415,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d1f7fc8b1b71..3c066b62e689 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6f565c0c0c3d..316fed5f2735 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -280,6 +280,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc..ac7e1cffbcec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 1bd34b2a47e8..29367105df54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -224,7 +224,7 @@ static const struct stmmac_hwif_entry { .regs = { .ptp_off = PTP_GMAC4_OFFSET, .mmc_off = MMC_GMAC4_OFFSET, - .est_off = EST_XGMAC_OFFSET, + .est_off = EST_GMAC4_OFFSET, }, .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b1919278e931..2129ae42c703 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1907,20 +1907,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4bc05948f772..a78c692f2d3c 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -212,7 +212,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) u32 unit_count; u32 unit; - unit_count = roundup(ipa->endpoint_count, 32); + unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32); for (unit = 0; unit < unit_count; unit++) { const struct reg *reg; u32 val; diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 894172a3e15f..337899c69738 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -421,9 +421,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_suspend(struct phy_device *phydev) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index be72824f32b2..d19cd863d294 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1101,6 +1101,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1353,6 +1355,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1407,7 +1410,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index e5c3500443c6..ec4e97c61eef 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != __alignof__(struct bpf_iter_task)); + kit->pos = NULL; + switch (flags) { case BPF_TASK_ITER_ALL_THREADS: case BPF_TASK_ITER_ALL_PROCS: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 65f598694d55..b263f093ee76 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc) #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) #endif +#ifdef CONFIG_BPF_JIT BTF_ID(struct, bpf_cpumask) +#endif BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93ecfceac1bc..4d75ef9d24bf 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } diff --git a/net/core/sock.c b/net/core/sock.c index 0a7f46c37f0c..5e78798456fd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname, */ WRITE_ONCE(sk->sk_txrehash, (u8)val); return 0; + case SO_PEEK_OFF: + { + int (*set_peek_off)(struct sock *sk, int val); + + set_peek_off = READ_ONCE(sock->ops)->set_peek_off; + if (set_peek_off) + ret = set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + return ret; + } } sockopt_lock_sock(sk); @@ -1430,18 +1441,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; - case SO_PEEK_OFF: - { - int (*set_peek_off)(struct sock *sk, int val); - - set_peek_off = READ_ONCE(sock->ops)->set_peek_off; - if (set_peek_off) - ret = set_peek_off(sk, val); - else - ret = -EOPNOTSUPP; - break; - } - case SO_NOFCS: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; diff --git a/net/devlink/core.c b/net/devlink/core.c index 6a58342752b4..7f0b093208d7 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -529,14 +529,20 @@ static int __init devlink_init(void) { int err; - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; + err = genl_register_family(&devlink_nl_family); + if (err) + goto out_unreg_pernet_subsys; err = register_netdevice_notifier(&devlink_port_netdevice_nb); + if (!err) + return 0; + + genl_unregister_family(&devlink_nl_family); +out_unreg_pernet_subsys: + unregister_pernet_subsys(&devlink_pernet_ops); out: WARN_ON(err); return err; diff --git a/net/devlink/port.c b/net/devlink/port.c index 78592912f657..4b2d46ccfe48 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, + DEVLINK_CMD_PORT_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5..0d0d725b46ad 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f631b0a21af4..e474b201900f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk) void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { - bool slow = lock_sock_fast(sk); - - sk_peek_offset_bwd(sk, len); - unlock_sock_fast(sk, slow); - } + sk_peek_offset_bwd(sk, len); if (!skb_unref(skb)) return; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..02e9ffb63af1 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9f..35508abd76f4 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa54b..6218dcd07e18 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 3aa50dc7535b..976fe250b509 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index faba31f2eff2..3dd5f52bc1b5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); @@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - *karg = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - *karg = skb_peek(&sk->sk_receive_queue)->len; - else - *karg = 0; - release_sock(sk); + *karg = pep_first_packet_length(sk); ret = 0; break; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b73..6ee7064c82fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ unbind_filter: } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9fbc70200cd0..de96959336c4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1772,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1785,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1803,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1844,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1947,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1966,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2064,6 +2070,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2128,10 +2136,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 30b178ebba60..0748e7ea5210 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_set_peek_off(struct sock *sk, int val) -{ - struct unix_sock *u = unix_sk(sk); - - if (mutex_lock_interruptible(&u->iolock)) - return -EINTR; - - WRITE_ONCE(sk->sk_peek_off, val); - mutex_unlock(&u->iolock); - - return 0; -} - #ifdef CONFIG_PROC_FS static int unix_count_nr_fds(struct sock *sk) { @@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = { .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .splice_read = unix_stream_splice_read, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2ff7ddbaa782..2a81880dac7b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -314,18 +322,6 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - while (!list_empty(&gc_candidates)) { - u = list_entry(gc_candidates.next, struct unix_sock, link); - if (u->oob_skb) { - struct sk_buff *skb = u->oob_skb; - - u->oob_skb = NULL; - kfree_skb(skb); - } - } -#endif - spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1eadfac03cc4..b78c0e095e22 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, memcpy(vaddr, buffer, len); kunmap_local(vaddr); - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); } if (first_frag && desc->options & XDP_TX_METADATA) { diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 61b7dddedc46..0669bac5e900 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\\ () system call, and this diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index c82a7f41b31c..6e6d474c8366 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -466,6 +466,8 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) int ynl_recv_ack(struct ynl_sock *ys, int ret) { + struct ynl_parse_arg yarg = { .ys = ys, }; + if (!ret) { yerr(ys, YNL_ERROR_EXPECT_ACK, "Expecting an ACK but nothing received"); @@ -478,7 +480,7 @@ int ynl_recv_ack(struct ynl_sock *ys, int ret) return ret; } return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, - ynl_cb_null, ys); + ynl_cb_null, &yarg); } int ynl_cb_null(const struct nlmsghdr *nlh, void *data) @@ -586,7 +588,13 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) return err; } - return ynl_recv_ack(ys, err); + err = ynl_recv_ack(ys, err); + if (err < 0) { + free(ys->mcast_groups); + return err; + } + + return 0; } struct ynl_sock * @@ -741,11 +749,14 @@ err_free: static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) { - return ynl_ntf_parse((struct ynl_sock *)data, nlh); + struct ynl_parse_arg *yarg = data; + + return ynl_ntf_parse(yarg->ys, nlh); } int ynl_ntf_check(struct ynl_sock *ys) { + struct ynl_parse_arg yarg = { .ys = ys, }; ssize_t len; int err; @@ -767,7 +778,7 @@ int ynl_ntf_check(struct ynl_sock *ys) return len; err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_ntf_trampoline, ys, + ynl_ntf_trampoline, &yarg, ynl_cb_array, NLMSG_MIN_TYPE); if (err < 0) return err; diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index bf84d4a1d9ae..3c440370c1f0 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -193,6 +193,7 @@ static void subtest_task_iters(void) ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c new file mode 100644 index 000000000000..3405923fe4e6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include "test_progs.h" +#include "read_vsyscall.skel.h" + +#if defined(__x86_64__) +/* For VSYSCALL_ADDR */ +#include <asm/vsyscall.h> +#else +/* To prevent build failure on non-x86 arch */ +#define VSYSCALL_ADDR 0UL +#endif + +struct read_ret_desc { + const char *name; + int ret; +} all_read[] = { + { .name = "probe_read_kernel", .ret = -ERANGE }, + { .name = "probe_read_kernel_str", .ret = -ERANGE }, + { .name = "probe_read", .ret = -ERANGE }, + { .name = "probe_read_str", .ret = -ERANGE }, + { .name = "probe_read_user", .ret = -EFAULT }, + { .name = "probe_read_user_str", .ret = -EFAULT }, + { .name = "copy_from_user", .ret = -EFAULT }, + { .name = "copy_from_user_task", .ret = -EFAULT }, +}; + +void test_read_vsyscall(void) +{ + struct read_vsyscall *skel; + unsigned int i; + int err; + +#if !defined(__x86_64__) + test__skip(); + return; +#endif + skel = read_vsyscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load")) + return; + + skel->bss->target_pid = getpid(); + err = read_vsyscall__attach(skel); + if (!ASSERT_EQ(err, 0, "read_vsyscall attach")) + goto out; + + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE, + * but it doesn't affect the returned error codes. + */ + skel->bss->user_ptr = (void *)VSYSCALL_ADDR; + usleep(1); + + for (i = 0; i < ARRAY_SIZE(all_read); i++) + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name); +out: + read_vsyscall__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 760ad96b4be0..d66687f1ee6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -4,10 +4,29 @@ #include "timer.skel.h" #include "timer_failure.skel.h" +#define NUM_THR 8 + +static void *spin_lock_thread(void *arg) +{ + int i, err, prog_fd = *(int *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + for (i = 0; i < 10000; i++) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err") || + !ASSERT_OK(topts.retval, "test_run_opts retval")) + break; + } + + pthread_exit(arg); +} + static int timer(struct timer *timer_skel) { - int err, prog_fd; + int i, err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts); + pthread_t thread_id[NUM_THR]; + void *ret; err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) @@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel) /* check that code paths completed */ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + prog_fd = bpf_program__fd(timer_skel->progs.race); + for (i = 0; i < NUM_THR; i++) { + err = pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + while (i) { + err = pthread_join(thread_id[--i], &ret); + if (ASSERT_OK(err, "pthread_join")) + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); + } + return 0; } diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c index c9b4055cd410..e4d53e40ff20 100644 --- a/tools/testing/selftests/bpf/progs/iters_task.c +++ b/tools/testing/selftests/bpf/progs/iters_task.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; pid_t target_pid; -int procs_cnt, threads_cnt, proc_threads_cnt; +int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx) procs_cnt = threads_cnt = proc_threads_cnt = 0; bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, ~0U) { + /* Below instructions shouldn't be executed for invalid flags */ + invalid_cnt++; + } + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) { + /* Below instructions shouldn't be executed for invalid task__nullable */ + invalid_cnt++; + } + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) if (pos->pid == target_pid) procs_cnt++; diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c new file mode 100644 index 000000000000..986f96687ae1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +int target_pid = 0; +void *user_ptr = 0; +int read_ret[8]; + +char _license[] SEC("license") = "GPL"; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int do_probe_read(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr); + read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr); + read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr); + read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr); + read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr); + read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int do_copy_from_user(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr); + read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, + bpf_get_current_task_btf(), 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 8b946c8188c6..f615da97df26 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,8 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"), + race_array SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a) return 0; } + +static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer) +{ + bpf_timer_start(timer, 1000000, 0); + return 0; +} + +SEC("syscall") +int race(void *ctx) +{ + struct bpf_timer *timer; + int err, race_key = 0; + struct elem init; + + __builtin_memset(&init, 0, sizeof(struct elem)); + bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY); + + timer = bpf_map_lookup_elem(&race_array, &race_key); + if (!timer) + return 1; + + err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC); + if (err && err != -EBUSY) + return 1; + + bpf_timer_set_callback(timer, race_timer_callback); + bpf_timer_start(timer, 0, 0); + bpf_timer_cancel(timer); + + return 0; +} diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index fe59ca3e5596..12491850ae98 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -367,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 - - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1 @@ -489,7 +487,7 @@ out_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -509,7 +507,7 @@ out_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -543,14 +541,14 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done @@ -574,7 +572,7 @@ out_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 100 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -604,7 +602,7 @@ in_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -624,7 +622,7 @@ in_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -651,7 +649,7 @@ in_bits() dev veth0 run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -679,7 +677,7 @@ in_oflag() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -703,7 +701,7 @@ in_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 80 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -731,7 +729,7 @@ fwd_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db01::2 db02::2 veth0 0xfff002 123 + db02::2 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d4190126..895e5bb5044b 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> -#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> #include <stdlib.h> @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; } -static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; - if (argc != 7) + if (argc != 5) goto out; - tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out; - if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out; - if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out; - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) - goto out; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) goto close; - recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close; - ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; - if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg); - if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 49c84602707f..b95c249f81c2 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1485,6 +1485,51 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; |