From 0dbffbb5335a1e3aa6855e4ee317e25e669dd302 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Jun 2021 07:12:45 -0700 Subject: net: annotate data race around sk_ll_usec sk_ll_usec is read locklessly from sk_can_busy_loop() while another thread can change its value in sock_setsockopt() This is correct but needs annotations. BUG: KCSAN: data-race in __skb_try_recv_datagram / sock_setsockopt write to 0xffff88814eb5f904 of 4 bytes by task 14011 on cpu 0: sock_setsockopt+0x1287/0x2090 net/core/sock.c:1175 __sys_setsockopt+0x14f/0x200 net/socket.c:2100 __do_sys_setsockopt net/socket.c:2115 [inline] __se_sys_setsockopt net/socket.c:2112 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2112 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88814eb5f904 of 4 bytes by task 14001 on cpu 1: sk_can_busy_loop include/net/busy_poll.h:41 [inline] __skb_try_recv_datagram+0x14f/0x320 net/core/datagram.c:273 unix_dgram_recvmsg+0x14c/0x870 net/unix/af_unix.c:2101 unix_seqpacket_recvmsg+0x5a/0x70 net/unix/af_unix.c:2067 ____sys_recvmsg+0x15d/0x310 include/linux/uio.h:244 ___sys_recvmsg net/socket.c:2598 [inline] do_recvmmsg+0x35c/0x9f0 net/socket.c:2692 __sys_recvmmsg net/socket.c:2771 [inline] __do_sys_recvmmsg net/socket.c:2794 [inline] __se_sys_recvmmsg net/socket.c:2787 [inline] __x64_sys_recvmmsg+0xcf/0x150 net/socket.c:2787 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000000 -> 0x00000101 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 14001 Comm: syz-executor.3 Not tainted 5.13.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 73af4a64a599..40296ed976a9 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -38,7 +38,7 @@ static inline bool net_busy_loop_on(void) static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && !signal_pending(current); + return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current); } bool sk_busy_loop_end(void *p, unsigned long start_time); -- cgit v1.2.3-59-g8ed1b From 1d11fa231cabeae09a95cb3e4cf1d9dd34e00f08 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Jun 2021 23:34:08 -0400 Subject: sctp: move 198 addresses from unusable to private scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc draft-stewart-tsvwg-sctp-ipv4-00 that restricts 198 addresses was never published. These addresses as private addresses should be allowed to use in SCTP. As Michael Tuexen suggested, this patch is to move 198 addresses from unusable to private scope. Reported-by: Sérgio Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 +--- net/sctp/protocol.c | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 265fffa33dad..5859e0a16a58 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -360,8 +360,7 @@ enum { #define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , - * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, - * 192.88.99.0/24. + * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24. * Also, RFC 8.4, non-unicast addresses are not considered valid SCTP * addresses. */ @@ -369,7 +368,6 @@ enum { ((htonl(INADDR_BROADCAST) == a) || \ ipv4_is_multicast(a) || \ ipv4_is_zeronet(a) || \ - ipv4_is_test_198(a) || \ ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3c1fbf38f4f7..ec0f52567c16 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -398,7 +398,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr) retval = SCTP_SCOPE_LINK; } else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) || ipv4_is_private_172(addr->v4.sin_addr.s_addr) || - ipv4_is_private_192(addr->v4.sin_addr.s_addr)) { + ipv4_is_private_192(addr->v4.sin_addr.s_addr) || + ipv4_is_test_198(addr->v4.sin_addr.s_addr)) { retval = SCTP_SCOPE_PRIVATE; } else { retval = SCTP_SCOPE_GLOBAL; -- cgit v1.2.3-59-g8ed1b From 5d43f951b1ac797450bb4d230fdc960b739bea04 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:52 +0800 Subject: ptp: add ptp virtual clock driver framework This patch is to add ptp virtual clock driver framework utilizing timecounter/cyclecounter. The patch just exports two essential APIs for PTP driver. - ptp_vclock_register() - ptp_vclock_unregister() Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/Makefile | 2 +- drivers/ptp/ptp_private.h | 15 ++++ drivers/ptp/ptp_vclock.c | 150 +++++++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 4 +- 4 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 drivers/ptp/ptp_vclock.c (limited to 'include') diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 8673d1743faa..28a6fe342d3e 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -3,7 +3,7 @@ # Makefile for PTP 1588 clock support. # -ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o +ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o ptp_vclock.o ptp_kvm-$(CONFIG_X86) := ptp_kvm_x86.o ptp_kvm_common.o ptp_kvm-$(CONFIG_HAVE_ARM_SMCCC) := ptp_kvm_arm.o ptp_kvm_common.o obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 6b97155148f1..853b79b6b30e 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -48,6 +48,19 @@ struct ptp_clock { struct kthread_delayed_work aux_work; }; +#define info_to_vclock(d) container_of((d), struct ptp_vclock, info) +#define cc_to_vclock(d) container_of((d), struct ptp_vclock, cc) +#define dw_to_vclock(d) container_of((d), struct ptp_vclock, refresh_work) + +struct ptp_vclock { + struct ptp_clock *pclock; + struct ptp_clock_info info; + struct ptp_clock *clock; + struct cyclecounter cc; + struct timecounter tc; + spinlock_t lock; /* protects tc/cc */ +}; + /* * The function queue_cnt() is safe for readers to call without * holding q->lock. Readers use this function to verify that the queue @@ -89,4 +102,6 @@ extern const struct attribute_group *ptp_groups[]; int ptp_populate_pin_groups(struct ptp_clock *ptp); void ptp_cleanup_pin_groups(struct ptp_clock *ptp); +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock); +void ptp_vclock_unregister(struct ptp_vclock *vclock); #endif diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c new file mode 100644 index 000000000000..fc9205cc504d --- /dev/null +++ b/drivers/ptp/ptp_vclock.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PTP virtual clock driver + * + * Copyright 2021 NXP + */ +#include +#include "ptp_private.h" + +#define PTP_VCLOCK_CC_SHIFT 31 +#define PTP_VCLOCK_CC_MULT (1 << PTP_VCLOCK_CC_SHIFT) +#define PTP_VCLOCK_FADJ_SHIFT 9 +#define PTP_VCLOCK_FADJ_DENOMINATOR 15625ULL +#define PTP_VCLOCK_REFRESH_INTERVAL (HZ * 2) + +static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + s64 adj; + + adj = (s64)scaled_ppm << PTP_VCLOCK_FADJ_SHIFT; + adj = div_s64(adj, PTP_VCLOCK_FADJ_DENOMINATOR); + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_read(&vclock->tc); + vclock->cc.mult = PTP_VCLOCK_CC_MULT + adj; + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_adjtime(&vclock->tc, delta); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + u64 ns; + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_read(&vclock->tc); + spin_unlock_irqrestore(&vclock->lock, flags); + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int ptp_vclock_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_init(&vclock->tc, &vclock->cc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static long ptp_vclock_refresh(struct ptp_clock_info *ptp) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + struct timespec64 ts; + + ptp_vclock_gettime(&vclock->info, &ts); + + return PTP_VCLOCK_REFRESH_INTERVAL; +} + +static const struct ptp_clock_info ptp_vclock_info = { + .owner = THIS_MODULE, + .name = "ptp virtual clock", + /* The maximum ppb value that long scaled_ppm can support */ + .max_adj = 32767999, + .adjfine = ptp_vclock_adjfine, + .adjtime = ptp_vclock_adjtime, + .gettime64 = ptp_vclock_gettime, + .settime64 = ptp_vclock_settime, + .do_aux_work = ptp_vclock_refresh, +}; + +static u64 ptp_vclock_read(const struct cyclecounter *cc) +{ + struct ptp_vclock *vclock = cc_to_vclock(cc); + struct ptp_clock *ptp = vclock->pclock; + struct timespec64 ts = {}; + + if (ptp->info->gettimex64) + ptp->info->gettimex64(ptp->info, &ts, NULL); + else + ptp->info->gettime64(ptp->info, &ts); + + return timespec64_to_ns(&ts); +} + +static const struct cyclecounter ptp_vclock_cc = { + .read = ptp_vclock_read, + .mask = CYCLECOUNTER_MASK(32), + .mult = PTP_VCLOCK_CC_MULT, + .shift = PTP_VCLOCK_CC_SHIFT, +}; + +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) +{ + struct ptp_vclock *vclock; + + vclock = kzalloc(sizeof(*vclock), GFP_KERNEL); + if (!vclock) + return NULL; + + vclock->pclock = pclock; + vclock->info = ptp_vclock_info; + vclock->cc = ptp_vclock_cc; + + snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt", + pclock->index); + + spin_lock_init(&vclock->lock); + + vclock->clock = ptp_clock_register(&vclock->info, &pclock->dev); + if (IS_ERR_OR_NULL(vclock->clock)) { + kfree(vclock); + return NULL; + } + + timecounter_init(&vclock->tc, &vclock->cc, 0); + ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); + + return vclock; +} + +void ptp_vclock_unregister(struct ptp_vclock *vclock) +{ + ptp_clock_unregister(vclock->clock); + kfree(vclock); +} diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index aba237c0b3a2..b6fb771ee524 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -11,7 +11,9 @@ #include #include #include +#include +#define PTP_CLOCK_NAME_LEN 32 /** * struct ptp_clock_request - request PTP clock event * @@ -134,7 +136,7 @@ struct ptp_system_timestamp { struct ptp_clock_info { struct module *owner; - char name[16]; + char name[PTP_CLOCK_NAME_LEN]; s32 max_adj; int n_alarm; int n_ext_ts; -- cgit v1.2.3-59-g8ed1b From acb288e8047b7569fbc9af6fa6e9405315345103 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:55 +0800 Subject: ptp: add kernel API ptp_get_vclocks_index() Add kernel API ptp_get_vclocks_index() to get all ptp vclocks index on pclock. This is preparation for supporting ptp vclocks info query through ethtool. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 3 ++- drivers/ptp/ptp_private.h | 2 ++ drivers/ptp/ptp_vclock.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 14 ++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 9205a9362a9d..f012fa581cf4 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -24,10 +24,11 @@ #define PTP_PPS_EVENT PPS_CAPTUREASSERT #define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC) +struct class *ptp_class; + /* private globals */ static dev_t ptp_devt; -static struct class *ptp_class; static DEFINE_IDA(ptp_clocks_map); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index f75fadd9b244..dba6be477067 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -96,6 +96,8 @@ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) return in_use; } +extern struct class *ptp_class; + /* * see ptp_chardev.c */ diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index fc9205cc504d..cefab29a0592 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -148,3 +148,38 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock) ptp_clock_unregister(vclock->clock); kfree(vclock); } + +int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_clock *ptp; + struct device *dev; + int num = 0; + + if (pclock_index < 0) + return num; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", pclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return num; + + ptp = dev_get_drvdata(dev); + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) { + put_device(dev); + return num; + } + + *vclock_index = kzalloc(sizeof(int) * ptp->n_vclocks, GFP_KERNEL); + if (!(*vclock_index)) + goto out; + + memcpy(*vclock_index, ptp->vclock_index, sizeof(int) * ptp->n_vclocks); + num = ptp->n_vclocks; +out: + mutex_unlock(&ptp->n_vclocks_mux); + put_device(dev); + return num; +} +EXPORT_SYMBOL(ptp_get_vclocks_index); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b6fb771ee524..300a984fec87 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -306,6 +306,18 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); */ void ptp_cancel_worker_sync(struct ptp_clock *ptp); +/** + * ptp_get_vclocks_index() - get all vclocks index on pclock, and + * caller is responsible to free memory + * of vclock_index + * + * @pclock_index: phc index of ptp pclock. + * @vclock_index: pointer to pointer of vclock index. + * + * return number of vclocks. + */ +int ptp_get_vclocks_index(int pclock_index, int **vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -325,6 +337,8 @@ static inline int ptp_schedule_worker(struct ptp_clock *ptp, { return -EOPNOTSUPP; } static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } +static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ return 0; } #endif -- cgit v1.2.3-59-g8ed1b From c156174a67070042d51d2c866146d3c934d5468c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:56 +0800 Subject: ethtool: add a new command for getting PHC virtual clocks Add an interface for getting PHC (PTP Hardware Clock) virtual clocks, which are based on PHC physical clock providing hardware timestamp to network packets. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 22 +++++++ include/linux/ethtool.h | 10 +++ include/uapi/linux/ethtool_netlink.h | 15 +++++ net/ethtool/Makefile | 2 +- net/ethtool/common.c | 13 ++++ net/ethtool/netlink.c | 10 +++ net/ethtool/netlink.h | 2 + net/ethtool/phc_vclocks.c | 94 ++++++++++++++++++++++++++++ 8 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/phc_vclocks.c (limited to 'include') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 6ea91e41593f..c86628e6a235 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -212,6 +212,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEC_SET`` set FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET`` get standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` get PHC virtual clocks info ===================================== ================================ Kernel to userspace: @@ -250,6 +251,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEC_NTF`` FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET_REPLY`` standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY`` PHC virtual clocks info ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1477,6 +1479,25 @@ Low and high bounds are inclusive, for example: etherStatsPkts512to1023Octets 512 1023 ============================= ==== ==== +PHC_VCLOCKS_GET +=============== + +Query device PHC virtual clocks information. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested reply header + ``ETHTOOL_A_PHC_VCLOCKS_NUM`` u32 PHC virtual clocks number + ``ETHTOOL_A_PHC_VCLOCKS_INDEX`` s32 PHC index array + ==================================== ====== ========================== + Request translation =================== @@ -1575,4 +1596,5 @@ are netlink only. n/a ``ETHTOOL_MSG_CABLE_TEST_ACT`` n/a ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` + n/a ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` =================================== ===================================== diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 29dbb603bc91..232daaec56e4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -757,6 +757,16 @@ void ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, enum ethtool_link_mode_bit_indices link_mode); +/** + * ethtool_get_phc_vclocks - Derive phc vclocks information, and caller + * is responsible to free memory of vclock_index + * @dev: pointer to net_device structure + * @vclock_index: pointer to pointer of vclock index + * + * Return number of phc vclocks + */ +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); + /** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to start of string to update diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c7135c9c37a5..b3b93710eff7 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -46,6 +46,7 @@ enum { ETHTOOL_MSG_FEC_SET, ETHTOOL_MSG_MODULE_EEPROM_GET, ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -88,6 +89,7 @@ enum { ETHTOOL_MSG_FEC_NTF, ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -440,6 +442,19 @@ enum { ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + /* CABLE TEST */ enum { diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 723c9a8a8cdf..0a19470efbfb 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o eeprom.o stats.o + tunnels.o fec.o eeprom.o stats.o phc_vclocks.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index f9dcbad84788..798231b07676 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "common.h" @@ -554,6 +555,18 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) return 0; } +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) +{ + struct ethtool_ts_info info = { }; + int num = 0; + + if (!__ethtool_get_ts_info(dev, &info)) + num = ptp_get_vclocks_index(info.phc_index, vclock_index); + + return num; +} +EXPORT_SYMBOL(ethtool_get_phc_vclocks); + const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a7346346114f..73e0f5b626bf 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -248,6 +248,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, + [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -958,6 +959,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_stats_get_policy, .maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_phc_vclocks_get_policy, + .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3e25a47fd482..3fc395c86702 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,6 +347,7 @@ extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; extern const struct ethnl_request_ops ethnl_stats_request_ops; +extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -382,6 +383,7 @@ extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; +extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c new file mode 100644 index 000000000000..637b2f5297d5 --- /dev/null +++ b/net/ethtool/phc_vclocks.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 NXP + */ +#include "netlink.h" +#include "common.h" + +struct phc_vclocks_req_info { + struct ethnl_req_info base; +}; + +struct phc_vclocks_reply_data { + struct ethnl_reply_data base; + int num; + int *index; +}; + +#define PHC_VCLOCKS_REPDATA(__reply_base) \ + container_of(__reply_base, struct phc_vclocks_reply_data, base) + +const struct nla_policy ethnl_phc_vclocks_get_policy[] = { + [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + data->num = ethtool_get_phc_vclocks(dev, &data->index); + ethnl_ops_complete(dev); + + return ret; +} + +static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + int len = 0; + + if (data->num > 0) { + len += nla_total_size(sizeof(u32)); + len += nla_total_size(sizeof(s32) * data->num); + } + + return len; +} + +static int phc_vclocks_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + if (data->num <= 0) + return 0; + + if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) || + nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX, + sizeof(s32) * data->num, data->index)) + return -EMSGSIZE; + + return 0; +} + +static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + kfree(data->index); +} + +const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = { + .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER, + .req_info_size = sizeof(struct phc_vclocks_req_info), + .reply_data_size = sizeof(struct phc_vclocks_reply_data), + + .prepare_data = phc_vclocks_prepare_data, + .reply_size = phc_vclocks_reply_size, + .fill_reply = phc_vclocks_fill_reply, + .cleanup_data = phc_vclocks_cleanup_data, +}; -- cgit v1.2.3-59-g8ed1b From 895487a3a10fb3a177e20dcde875515d46ccd4df Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:57 +0800 Subject: ptp: add kernel API ptp_convert_timestamp() Add kernel API ptp_convert_timestamp() to convert raw hardware timestamp to a specified ptp vclock time. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_vclock.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 13 +++++++++++++ 2 files changed, 47 insertions(+) (limited to 'include') diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index cefab29a0592..e0f87c57749a 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -183,3 +183,37 @@ out: return num; } EXPORT_SYMBOL(ptp_get_vclocks_index); + +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_vclock *vclock; + struct ptp_clock *ptp; + unsigned long flags; + struct device *dev; + u64 ns; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", vclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return; + + ptp = dev_get_drvdata(dev); + if (!ptp->is_virtual_clock) { + put_device(dev); + return; + } + + vclock = info_to_vclock(ptp->info); + + ns = ktime_to_ns(hwtstamps->hwtstamp); + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_cyc2time(&vclock->tc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + put_device(dev); + hwtstamps->hwtstamp = ns_to_ktime(ns); +} +EXPORT_SYMBOL(ptp_convert_timestamp); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 300a984fec87..71fac9237725 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,7 @@ #include #include #include +#include #define PTP_CLOCK_NAME_LEN 32 /** @@ -318,6 +319,15 @@ void ptp_cancel_worker_sync(struct ptp_clock *ptp); */ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); +/** + * ptp_convert_timestamp() - convert timestamp to a ptp vclock time + * + * @hwtstamps: skb_shared_hwtstamps structure pointer + * @vclock_index: phc index of ptp vclock. + */ +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -339,6 +349,9 @@ static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } +static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ } #endif -- cgit v1.2.3-59-g8ed1b From d463126e23f112629edb01594141ca437a92a108 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:59 +0800 Subject: net: sock: extend SO_TIMESTAMPING for PHC binding Since PTP virtual clock support is added, there can be several PTP virtual clocks based on one PTP physical clock for timestamping. This patch is to extend SO_TIMESTAMPING API to support PHC (PTP Hardware Clock) binding by adding a new flag SOF_TIMESTAMPING_BIND_PHC. When PTP virtual clocks are in use, user space can configure to bind one for timestamping, but PTP physical clock is not supported and not needed to bind. This patch is preparation for timestamp conversion from raw timestamp to a specific PTP virtual clock time in core net. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++-- include/uapi/linux/net_tstamp.h | 17 +++++++++-- net/core/sock.c | 65 +++++++++++++++++++++++++++++++++++++++-- net/ethtool/common.c | 1 + net/mptcp/sockopt.c | 23 +++++++++++---- 5 files changed, 101 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8bdd80027ffb..f23cb259b0e2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -316,7 +316,9 @@ struct bpf_local_storage; * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only - * @sk_tsflags: SO_TIMESTAMPING socket options + * @sk_tsflags: SO_TIMESTAMPING flags + * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock + * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals @@ -493,6 +495,7 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; + int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; @@ -2755,7 +2758,8 @@ void sock_def_readable(struct sock *sk); int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); void sock_set_timestamp(struct sock *sk, int optname, bool valbool); -int sock_set_timestamping(struct sock *sk, int optname, int val); +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping); void sock_enable_timestamps(struct sock *sk); void sock_no_linger(struct sock *sk); diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 7ed0b3d1c00a..fcc61c73a666 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,7 +13,7 @@ #include #include /* for SO_TIMESTAMPING */ -/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +/* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), @@ -30,8 +30,9 @@ enum { SOF_TIMESTAMPING_OPT_STATS = (1<<12), SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), + SOF_TIMESTAMPING_BIND_PHC = (1 << 15), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; @@ -46,6 +47,18 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) +/** + * struct so_timestamping - SO_TIMESTAMPING parameter + * + * @flags: SO_TIMESTAMPING flags + * @bind_phc: Index of PTP virtual clock bound to sock. This is available + * if flag SOF_TIMESTAMPING_BIND_PHC is set. + */ +struct so_timestamping { + int flags; + int bind_phc; +}; + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * diff --git a/net/core/sock.c b/net/core/sock.c index dd9599656c40..cad107112204 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #include +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -810,8 +812,47 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool) } } -int sock_set_timestamping(struct sock *sk, int optname, int val) +static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) { + struct net *net = sock_net(sk); + struct net_device *dev = NULL; + bool match = false; + int *vclock_index; + int i, num; + + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(net, sk->sk_bound_dev_if); + + if (!dev) { + pr_err("%s: sock not bind to device\n", __func__); + return -EOPNOTSUPP; + } + + num = ethtool_get_phc_vclocks(dev, &vclock_index); + for (i = 0; i < num; i++) { + if (*(vclock_index + i) == phc_index) { + match = true; + break; + } + } + + if (num > 0) + kfree(vclock_index); + + if (!match) + return -EINVAL; + + sk->sk_bind_phc = phc_index; + + return 0; +} + +int sock_set_timestamping(struct sock *sk, int optname, + struct so_timestamping timestamping) +{ + int val = timestamping.flags; + int ret; + if (val & ~SOF_TIMESTAMPING_MASK) return -EINVAL; @@ -832,6 +873,12 @@ int sock_set_timestamping(struct sock *sk, int optname, int val) !(val & SOF_TIMESTAMPING_OPT_TSONLY)) return -EINVAL; + if (val & SOF_TIMESTAMPING_BIND_PHC) { + ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc); + if (ret) + return ret; + } + sk->sk_tsflags = val; sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); @@ -907,6 +954,7 @@ EXPORT_SYMBOL(sock_set_mark); int sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { + struct so_timestamping timestamping; struct sock_txtime sk_txtime; struct sock *sk = sock->sk; int val; @@ -1073,7 +1121,15 @@ set_sndbuf: case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: - ret = sock_set_timestamping(sk, optname, val); + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else { + memset(×tamping, 0, sizeof(timestamping)); + timestamping.flags = val; + } + ret = sock_set_timestamping(sk, optname, timestamping); break; case SO_RCVLOWAT: @@ -1348,6 +1404,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct __kernel_old_timeval tm; struct __kernel_sock_timeval stm; struct sock_txtime txtime; + struct so_timestamping timestamping; } v; int lv = sizeof(int); @@ -1451,7 +1508,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_TIMESTAMPING_OLD: - v.val = sk->sk_tsflags; + lv = sizeof(v.timestamping); + v.timestamping.flags = sk->sk_tsflags; + v.timestamping.bind_phc = sk->sk_bind_phc; break; case SO_RCVTIMEO_OLD: diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 798231b07676..c63e0739dc6a 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -398,6 +398,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats", [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", + [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index ea38cbcd2ad4..8c03afac5ca0 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -207,14 +207,25 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val, ret; + struct so_timestamping timestamping; + int ret; - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; + if (optlen == sizeof(timestamping)) { + if (copy_from_sockptr(×tamping, optval, + sizeof(timestamping))) + return -EFAULT; + } else if (optlen == sizeof(int)) { + memset(×tamping, 0, sizeof(timestamping)); + + if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) + return -EFAULT; + } else { + return -EINVAL; + } ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, - KERNEL_SOCKPTR(&val), sizeof(val)); + KERNEL_SOCKPTR(×tamping), + sizeof(timestamping)); if (ret) return ret; @@ -224,7 +235,7 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, val); + sock_set_timestamping(sk, optname, timestamping); unlock_sock_fast(ssk, slow); } -- cgit v1.2.3-59-g8ed1b From ca75bcf0a83b6cc7f53a593d98ec7121c4839b43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 10:15:09 +0200 Subject: net: remove the caif_hsi driver The caif_hsi driver relies on a cfhsi_get_ops symbol using symbol_get, but this symbol is not provided anywhere in the kernel tree. Remove this driver given that it is dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/caif/Kconfig | 9 - drivers/net/caif/Makefile | 3 - drivers/net/caif/caif_hsi.c | 1454 ------------------------------------------- include/net/caif/caif_hsi.h | 200 ------ 4 files changed, 1666 deletions(-) delete mode 100644 drivers/net/caif/caif_hsi.c delete mode 100644 include/net/caif/caif_hsi.h (limited to 'include') diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index a77124bc1f4b..709660cb38f8 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -20,15 +20,6 @@ config CAIF_TTY identified as N_CAIF. When this ldisc is opened from user space it will redirect the TTY's traffic into the CAIF stack. -config CAIF_HSI - tristate "CAIF HSI transport driver" - depends on CAIF - default n - help - The CAIF low level driver for CAIF over HSI. - Be aware that if you enable this then you also need to - enable a low-level HSI driver. - config CAIF_VIRTIO tristate "CAIF virtio transport driver" depends on CAIF && HAS_DMA diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index b1918c8c126c..97f664f8016c 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -4,8 +4,5 @@ ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG # Serial interface obj-$(CONFIG_CAIF_TTY) += caif_serial.o -# HSI interface -obj-$(CONFIG_CAIF_HSI) += caif_hsi.o - # Virtio interface obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c deleted file mode 100644 index 3d63b15bbaa1..000000000000 --- a/drivers/net/caif/caif_hsi.c +++ /dev/null @@ -1,1454 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson - * Dmitry.Tarnyagin / dmitry.tarnyagin@lockless.no - */ - -#define pr_fmt(fmt) KBUILD_MODNAME fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Daniel Martensson"); -MODULE_DESCRIPTION("CAIF HSI driver"); - -/* Returns the number of padding bytes for alignment. */ -#define PAD_POW2(x, pow) ((((x)&((pow)-1)) == 0) ? 0 :\ - (((pow)-((x)&((pow)-1))))) - -static const struct cfhsi_config hsi_default_config = { - - /* Inactivity timeout on HSI, ms */ - .inactivity_timeout = HZ, - - /* Aggregation timeout (ms) of zero means no aggregation is done*/ - .aggregation_timeout = 1, - - /* - * HSI link layer flow-control thresholds. - * Threshold values for the HSI packet queue. Flow-control will be - * asserted when the number of packets exceeds q_high_mark. It will - * not be de-asserted before the number of packets drops below - * q_low_mark. - * Warning: A high threshold value might increase throughput but it - * will at the same time prevent channel prioritization and increase - * the risk of flooding the modem. The high threshold should be above - * the low. - */ - .q_high_mark = 100, - .q_low_mark = 50, - - /* - * HSI padding options. - * Warning: must be a base of 2 (& operation used) and can not be zero ! - */ - .head_align = 4, - .tail_align = 4, -}; - -#define ON 1 -#define OFF 0 - -static LIST_HEAD(cfhsi_list); - -static void cfhsi_inactivity_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, inactivity_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Schedule power down work queue. */ - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_down_work); -} - -static void cfhsi_update_aggregation_stats(struct cfhsi *cfhsi, - const struct sk_buff *skb, - int direction) -{ - struct caif_payload_info *info; - int hpad, tpad, len; - - info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - len = skb->len + hpad + tpad; - - if (direction > 0) - cfhsi->aggregation_len += len; - else if (direction < 0) - cfhsi->aggregation_len -= len; -} - -static bool cfhsi_can_send_aggregate(struct cfhsi *cfhsi) -{ - int i; - - if (cfhsi->cfg.aggregation_timeout == 0) - return true; - - for (i = 0; i < CFHSI_PRIO_BEBK; ++i) { - if (cfhsi->qhead[i].qlen) - return true; - } - - /* TODO: Use aggregation_len instead */ - if (cfhsi->qhead[CFHSI_PRIO_BEBK].qlen >= CFHSI_MAX_PKTS) - return true; - - return false; -} - -static struct sk_buff *cfhsi_dequeue(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - int i; - - for (i = 0; i < CFHSI_PRIO_LAST; ++i) { - skb = skb_dequeue(&cfhsi->qhead[i]); - if (skb) - break; - } - - return skb; -} - -static int cfhsi_tx_queue_len(struct cfhsi *cfhsi) -{ - int i, len = 0; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - len += skb_queue_len(&cfhsi->qhead[i]); - return len; -} - -static void cfhsi_abort_tx(struct cfhsi *cfhsi) -{ - struct sk_buff *skb; - - for (;;) { - spin_lock_bh(&cfhsi->lock); - skb = cfhsi_dequeue(cfhsi); - if (!skb) - break; - - cfhsi->ndev->stats.tx_errors++; - cfhsi->ndev->stats.tx_dropped++; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - kfree_skb(skb); - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); -} - -static int cfhsi_flush_fifo(struct cfhsi *cfhsi) -{ - char buffer[32]; /* Any reasonable value */ - size_t fifo_occupancy; - int ret; - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - do { - ret = cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy); - if (ret) { - netdev_warn(cfhsi->ndev, - "%s: can't get FIFO occupancy: %d.\n", - __func__, ret); - break; - } else if (!fifo_occupancy) - /* No more data, exitting normally */ - break; - - fifo_occupancy = min(sizeof(buffer), fifo_occupancy); - set_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - ret = cfhsi->ops->cfhsi_rx(buffer, fifo_occupancy, - cfhsi->ops); - if (ret) { - clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - netdev_warn(cfhsi->ndev, - "%s: can't read data: %d.\n", - __func__, ret); - break; - } - - ret = 5 * HZ; - ret = wait_event_interruptible_timeout(cfhsi->flush_fifo_wait, - !test_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits), ret); - - if (ret < 0) { - netdev_warn(cfhsi->ndev, - "%s: can't wait for flush complete: %d.\n", - __func__, ret); - break; - } else if (!ret) { - ret = -ETIMEDOUT; - netdev_warn(cfhsi->ndev, - "%s: timeout waiting for flush complete.\n", - __func__); - break; - } - } while (1); - - return ret; -} - -static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int nfrms = 0; - int pld_len = 0; - struct sk_buff *skb; - u8 *pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - - skb = cfhsi_dequeue(cfhsi); - if (!skb) - return 0; - - /* Clear offset. */ - desc->offset = 0; - - /* Check if we can embed a CAIF frame. */ - if (skb->len < CFHSI_MAX_EMB_FRM_SZ) { - struct caif_payload_info *info; - int hpad; - int tpad; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Check if frame still fits with added alignment. */ - if ((skb->len + hpad + tpad) <= CFHSI_MAX_EMB_FRM_SZ) { - u8 *pemb = desc->emb_frm; - desc->offset = CFHSI_DESC_SHORT_SZ; - *pemb = (u8)(hpad - 1); - pemb += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in embedded CAIF frame. */ - skb_copy_bits(skb, 0, pemb, skb->len); - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - } - } - - /* Create payload CAIF frames. */ - while (nfrms < CFHSI_MAX_PKTS) { - struct caif_payload_info *info; - int hpad; - int tpad; - - if (!skb) - skb = cfhsi_dequeue(cfhsi); - - if (!skb) - break; - - /* Calculate needed head alignment and tail alignment. */ - info = (struct caif_payload_info *)&skb->cb; - - hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); - tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); - - /* Fill in CAIF frame length in descriptor. */ - desc->cffrm_len[nfrms] = hpad + skb->len + tpad; - - /* Fill head padding information. */ - *pfrm = (u8)(hpad - 1); - pfrm += hpad; - - /* Update network statistics. */ - spin_lock_bh(&cfhsi->lock); - cfhsi->ndev->stats.tx_packets++; - cfhsi->ndev->stats.tx_bytes += skb->len; - cfhsi_update_aggregation_stats(cfhsi, skb, -1); - spin_unlock_bh(&cfhsi->lock); - - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pfrm, skb->len); - - /* Update payload length. */ - pld_len += desc->cffrm_len[nfrms]; - - /* Update frame pointer. */ - pfrm += skb->len + tpad; - - /* Consume the SKB */ - consume_skb(skb); - skb = NULL; - - /* Update number of frames. */ - nfrms++; - } - - /* Unused length fields should be zero-filled (according to SPEC). */ - while (nfrms < CFHSI_MAX_PKTS) { - desc->cffrm_len[nfrms] = 0x0000; - nfrms++; - } - - /* Check if we can piggy-back another descriptor. */ - if (cfhsi_can_send_aggregate(cfhsi)) - desc->header |= CFHSI_PIGGY_DESC; - else - desc->header &= ~CFHSI_PIGGY_DESC; - - return CFHSI_DESC_SZ + pld_len; -} - -static void cfhsi_start_tx(struct cfhsi *cfhsi) -{ - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len, res; - - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - do { - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - if (!len) { - spin_lock_bh(&cfhsi->lock); - if (unlikely(cfhsi_tx_queue_len(cfhsi))) { - spin_unlock_bh(&cfhsi->lock); - res = -EAGAIN; - continue; - } - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - break; - } - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - } while (res < 0); -} - -static void cfhsi_tx_done(struct cfhsi *cfhsi) -{ - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* - * Send flow on if flow off has been previously signalled - * and number of packets is below low water mark. - */ - spin_lock_bh(&cfhsi->lock); - if (cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) <= cfhsi->cfg.q_low_mark && - cfhsi->cfdev.flowctrl) { - - cfhsi->flow_off_sent = 0; - cfhsi->cfdev.flowctrl(cfhsi->ndev, ON); - } - - if (cfhsi_can_send_aggregate(cfhsi)) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_start_tx(cfhsi); - } else { - mod_timer(&cfhsi->aggregation_timer, - jiffies + cfhsi->cfg.aggregation_timeout); - spin_unlock_bh(&cfhsi->lock); - } - - return; -} - -static void cfhsi_tx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - cfhsi_tx_done(cfhsi); -} - -static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Check for embedded CAIF frame. */ - if (desc->offset) { - struct sk_buff *skb; - int len = 0; - pfrm = ((u8 *)desc) + desc->offset; - - /* Remove offset padding. */ - pfrm += *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pfrm; - len |= ((*(pfrm+1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frame. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pfrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - /* Check for piggy-backed descriptor. */ - if (desc->header & CFHSI_PIGGY_DESC) - xfer_sz += CFHSI_DESC_SZ; - - if ((xfer_sz % 4) || (xfer_sz > (CFHSI_BUF_SZ_RX - CFHSI_DESC_SZ))) { - netdev_err(cfhsi->ndev, - "%s: Invalid payload len: %d, ignored.\n", - __func__, xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_desc_len(struct cfhsi_desc *desc) -{ - int xfer_sz = 0; - int nfrms = 0; - u16 *plen; - - if ((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ)) { - - pr_err("Invalid descriptor. %x %x\n", desc->header, - desc->offset); - return -EPROTO; - } - - /* Calculate transfer length. */ - plen = desc->cffrm_len; - while (nfrms < CFHSI_MAX_PKTS && *plen) { - xfer_sz += *plen; - plen++; - nfrms++; - } - - if (xfer_sz % 4) { - pr_err("Invalid payload len: %d, ignored.\n", xfer_sz); - return -EPROTO; - } - return xfer_sz; -} - -static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi) -{ - int rx_sz = 0; - int nfrms = 0; - u16 *plen = NULL; - u8 *pfrm = NULL; - - /* Sanity check header and offset. */ - if (WARN_ON((desc->header & ~CFHSI_PIGGY_DESC) || - (desc->offset > CFHSI_MAX_EMB_FRM_SZ))) { - netdev_err(cfhsi->ndev, "%s: Invalid descriptor.\n", - __func__); - return -EPROTO; - } - - /* Set frame pointer to start of payload. */ - pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; - plen = desc->cffrm_len; - - /* Skip already processed frames. */ - while (nfrms < cfhsi->rx_state.nfrms) { - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - /* Parse payload. */ - while (nfrms < CFHSI_MAX_PKTS && *plen) { - struct sk_buff *skb; - u8 *pcffrm = NULL; - int len; - - /* CAIF frame starts after head padding. */ - pcffrm = pfrm + *pfrm + 1; - - /* Read length of CAIF frame (little endian). */ - len = *pcffrm; - len |= ((*(pcffrm + 1)) << 8) & 0xFF00; - len += 2; /* Add FCS fields. */ - - /* Sanity check length of CAIF frames. */ - if (unlikely(len > CFHSI_MAX_CAIF_FRAME_SZ)) { - netdev_err(cfhsi->ndev, "%s: Invalid length.\n", - __func__); - return -EPROTO; - } - - /* Allocate SKB (OK even in IRQ context). */ - skb = alloc_skb(len + 1, GFP_ATOMIC); - if (!skb) { - netdev_err(cfhsi->ndev, "%s: Out of memory !\n", - __func__); - cfhsi->rx_state.nfrms = nfrms; - return -ENOMEM; - } - caif_assert(skb != NULL); - - skb_put_data(skb, pcffrm, len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = cfhsi->ndev; - - netif_rx_any_context(skb); - - /* Update network statistics. */ - cfhsi->ndev->stats.rx_packets++; - cfhsi->ndev->stats.rx_bytes += len; - - pfrm += *plen; - rx_sz += *plen; - plen++; - nfrms++; - } - - return rx_sz; -} - -static void cfhsi_rx_done(struct cfhsi *cfhsi) -{ - int res; - int desc_pld_len = 0, rx_len, rx_state; - struct cfhsi_desc *desc = NULL; - u8 *rx_ptr, *rx_buf; - struct cfhsi_desc *piggy_desc = NULL; - - desc = (struct cfhsi_desc *)cfhsi->rx_buf; - - netdev_dbg(cfhsi->ndev, "%s\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Update inactivity timer if pending. */ - spin_lock_bh(&cfhsi->lock); - mod_timer_pending(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - desc_pld_len = cfhsi_rx_desc_len(desc); - - if (desc_pld_len < 0) - goto out_of_sync; - - rx_buf = cfhsi->rx_buf; - rx_len = desc_pld_len; - if (desc_pld_len > 0 && (desc->header & CFHSI_PIGGY_DESC)) - rx_len += CFHSI_DESC_SZ; - if (desc_pld_len == 0) - rx_buf = cfhsi->rx_flip_buf; - } else { - rx_buf = cfhsi->rx_flip_buf; - - rx_len = CFHSI_DESC_SZ; - if (cfhsi->rx_state.pld_len > 0 && - (desc->header & CFHSI_PIGGY_DESC)) { - - piggy_desc = (struct cfhsi_desc *) - (desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ + - cfhsi->rx_state.pld_len); - - cfhsi->rx_state.piggy_desc = true; - - /* Extract payload len from piggy-backed descriptor. */ - desc_pld_len = cfhsi_rx_desc_len(piggy_desc); - if (desc_pld_len < 0) - goto out_of_sync; - - if (desc_pld_len > 0) { - rx_len = desc_pld_len; - if (piggy_desc->header & CFHSI_PIGGY_DESC) - rx_len += CFHSI_DESC_SZ; - } - - /* - * Copy needed information from the piggy-backed - * descriptor to the descriptor in the start. - */ - memcpy(rx_buf, (u8 *)piggy_desc, - CFHSI_DESC_SHORT_SZ); - } - } - - if (desc_pld_len) { - rx_state = CFHSI_RX_STATE_PAYLOAD; - rx_ptr = rx_buf + CFHSI_DESC_SZ; - } else { - rx_state = CFHSI_RX_STATE_DESC; - rx_ptr = rx_buf; - rx_len = CFHSI_DESC_SZ; - } - - /* Initiate next read */ - if (test_bit(CFHSI_AWAKE, &cfhsi->bits)) { - /* Set up new transfer. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", - __func__); - - res = cfhsi->ops->cfhsi_rx(rx_ptr, rx_len, - cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: RX error %d.\n", - __func__, res); - cfhsi->ndev->stats.rx_errors++; - cfhsi->ndev->stats.rx_dropped++; - } - } - - if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { - /* Extract payload from descriptor */ - if (cfhsi_rx_desc(desc, cfhsi) < 0) - goto out_of_sync; - } else { - /* Extract payload */ - if (cfhsi_rx_pld(desc, cfhsi) < 0) - goto out_of_sync; - if (piggy_desc) { - /* Extract any payload in piggyback descriptor. */ - if (cfhsi_rx_desc(piggy_desc, cfhsi) < 0) - goto out_of_sync; - /* Mark no embedded frame after extracting it */ - piggy_desc->offset = 0; - } - } - - /* Update state info */ - memset(&cfhsi->rx_state, 0, sizeof(cfhsi->rx_state)); - cfhsi->rx_state.state = rx_state; - cfhsi->rx_ptr = rx_ptr; - cfhsi->rx_len = rx_len; - cfhsi->rx_state.pld_len = desc_pld_len; - cfhsi->rx_state.piggy_desc = desc->header & CFHSI_PIGGY_DESC; - - if (rx_buf != cfhsi->rx_buf) - swap(cfhsi->rx_buf, cfhsi->rx_flip_buf); - return; - -out_of_sync: - netdev_err(cfhsi->ndev, "%s: Out of sync.\n", __func__); - print_hex_dump_bytes("--> ", DUMP_PREFIX_NONE, - cfhsi->rx_buf, CFHSI_DESC_SZ); - schedule_work(&cfhsi->out_of_sync_work); -} - -static void cfhsi_rx_slowpath(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, rx_slowpath_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_rx_done_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (test_and_clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits)) - wake_up_interruptible(&cfhsi->flush_fifo_wait); - else - cfhsi_rx_done(cfhsi); -} - -static void cfhsi_wake_up(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - int res; - int len; - long ret; - - cfhsi = container_of(work, struct cfhsi, wake_up_work); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - if (unlikely(test_bit(CFHSI_AWAKE, &cfhsi->bits))) { - /* It happenes when wakeup is requested by - * both ends at the same time. */ - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - return; - } - - /* Activate wake line. */ - cfhsi->ops->cfhsi_wake_up(cfhsi->ops); - - netdev_dbg(cfhsi->ndev, "%s: Start waiting.\n", - __func__); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_up_wait, - test_and_clear_bit(CFHSI_WAKE_UP_ACK, - &cfhsi->bits), ret); - if (unlikely(ret < 0)) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } else if (!ret) { - bool ca_wake = false; - size_t fifo_occupancy = 0; - - /* Wakeup timeout */ - netdev_dbg(cfhsi->ndev, "%s: Timeout.\n", - __func__); - - /* Check FIFO to check if modem has sent something. */ - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - netdev_dbg(cfhsi->ndev, "%s: Bytes in FIFO: %u.\n", - __func__, (unsigned) fifo_occupancy); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - - if (ca_wake) { - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - - /* Clear the CFHSI_WAKE_UP_ACK bit to prevent race. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - /* Continue execution. */ - goto wake_ack; - } - - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - return; - } -wake_ack: - netdev_dbg(cfhsi->ndev, "%s: Woken.\n", - __func__); - - /* Clear power up bit. */ - set_bit(CFHSI_AWAKE, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - - /* Resume read operation. */ - netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->ops->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->ops); - - if (WARN_ON(res < 0)) - netdev_err(cfhsi->ndev, "%s: RX err %d.\n", __func__, res); - - /* Clear power up acknowledment. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - - spin_lock_bh(&cfhsi->lock); - - /* Resume transmit if queues are not empty. */ - if (!cfhsi_tx_queue_len(cfhsi)) { - netdev_dbg(cfhsi->ndev, "%s: Peer wake, start timer.\n", - __func__); - /* Start inactivity timer. */ - mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->cfg.inactivity_timeout); - spin_unlock_bh(&cfhsi->lock); - return; - } - - netdev_dbg(cfhsi->ndev, "%s: Host wake.\n", - __func__); - - spin_unlock_bh(&cfhsi->lock); - - /* Create HSI frame. */ - len = cfhsi_tx_frm((struct cfhsi_desc *)cfhsi->tx_buf, cfhsi); - - if (likely(len > 0)) { - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - netdev_err(cfhsi->ndev, - "%s: Failed to create HSI frame: %d.\n", - __func__, len); - } -} - -static void cfhsi_wake_down(struct work_struct *work) -{ - long ret; - struct cfhsi *cfhsi = NULL; - size_t fifo_occupancy = 0; - int retry = CFHSI_WAKE_TOUT; - - cfhsi = container_of(work, struct cfhsi, wake_down_work); - netdev_dbg(cfhsi->ndev, "%s.\n", __func__); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Deactivate wake line. */ - cfhsi->ops->cfhsi_wake_down(cfhsi->ops); - - /* Wait for acknowledge. */ - ret = CFHSI_WAKE_TOUT; - ret = wait_event_interruptible_timeout(cfhsi->wake_down_wait, - test_and_clear_bit(CFHSI_WAKE_DOWN_ACK, - &cfhsi->bits), ret); - if (ret < 0) { - /* Interrupted by signal. */ - netdev_err(cfhsi->ndev, "%s: Signalled: %ld.\n", - __func__, ret); - return; - } else if (!ret) { - bool ca_wake = true; - - /* Timeout */ - netdev_err(cfhsi->ndev, "%s: Timeout.\n", __func__); - - /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, - &ca_wake)); - if (!ca_wake) - netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", - __func__); - } - - /* Check FIFO occupancy. */ - while (retry) { - WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, - &fifo_occupancy)); - - if (!fifo_occupancy) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - retry--; - } - - if (!retry) - netdev_err(cfhsi->ndev, "%s: FIFO Timeout.\n", __func__); - - /* Clear AWAKE condition. */ - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Cancel pending RX requests. */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); -} - -static void cfhsi_out_of_sync(struct work_struct *work) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(work, struct cfhsi, out_of_sync_work); - - rtnl_lock(); - dev_close(cfhsi->ndev); - rtnl_unlock(); -} - -static void cfhsi_wake_up_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - set_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_up_wait); - - if (test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) - return; - - /* Schedule wake up work queue if the peer initiates. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); -} - -static void cfhsi_wake_down_cb(struct cfhsi_cb_ops *cb_ops) -{ - struct cfhsi *cfhsi = NULL; - - cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - /* Initiating low power is only permitted by the host (us). */ - set_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - wake_up_interruptible(&cfhsi->wake_down_wait); -} - -static void cfhsi_aggregation_tout(struct timer_list *t) -{ - struct cfhsi *cfhsi = from_timer(cfhsi, t, aggregation_timer); - - netdev_dbg(cfhsi->ndev, "%s.\n", - __func__); - - cfhsi_start_tx(cfhsi); -} - -static netdev_tx_t cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct cfhsi *cfhsi = NULL; - int start_xfer = 0; - int timer_active; - int prio; - - if (!dev) - return -EINVAL; - - cfhsi = netdev_priv(dev); - - switch (skb->priority) { - case TC_PRIO_BESTEFFORT: - case TC_PRIO_FILLER: - case TC_PRIO_BULK: - prio = CFHSI_PRIO_BEBK; - break; - case TC_PRIO_INTERACTIVE_BULK: - prio = CFHSI_PRIO_VI; - break; - case TC_PRIO_INTERACTIVE: - prio = CFHSI_PRIO_VO; - break; - case TC_PRIO_CONTROL: - default: - prio = CFHSI_PRIO_CTL; - break; - } - - spin_lock_bh(&cfhsi->lock); - - /* Update aggregation statistics */ - cfhsi_update_aggregation_stats(cfhsi, skb, 1); - - /* Queue the SKB */ - skb_queue_tail(&cfhsi->qhead[prio], skb); - - /* Sanity check; xmit should not be called after unregister_netdev */ - if (WARN_ON(test_bit(CFHSI_SHUTDOWN, &cfhsi->bits))) { - spin_unlock_bh(&cfhsi->lock); - cfhsi_abort_tx(cfhsi); - return -EINVAL; - } - - /* Send flow off if number of packets is above high water mark. */ - if (!cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) > cfhsi->cfg.q_high_mark && - cfhsi->cfdev.flowctrl) { - cfhsi->flow_off_sent = 1; - cfhsi->cfdev.flowctrl(cfhsi->ndev, OFF); - } - - if (cfhsi->tx_state == CFHSI_TX_STATE_IDLE) { - cfhsi->tx_state = CFHSI_TX_STATE_XFER; - start_xfer = 1; - } - - if (!start_xfer) { - /* Send aggregate if it is possible */ - bool aggregate_ready = - cfhsi_can_send_aggregate(cfhsi) && - del_timer(&cfhsi->aggregation_timer) > 0; - spin_unlock_bh(&cfhsi->lock); - if (aggregate_ready) - cfhsi_start_tx(cfhsi); - return NETDEV_TX_OK; - } - - /* Delete inactivity timer if started. */ - timer_active = del_timer_sync(&cfhsi->inactivity_timer); - - spin_unlock_bh(&cfhsi->lock); - - if (timer_active) { - struct cfhsi_desc *desc = (struct cfhsi_desc *)cfhsi->tx_buf; - int len; - int res; - - /* Create HSI frame. */ - len = cfhsi_tx_frm(desc, cfhsi); - WARN_ON(!len); - - /* Set up new transfer. */ - res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); - if (WARN_ON(res < 0)) { - netdev_err(cfhsi->ndev, "%s: TX error %d.\n", - __func__, res); - cfhsi_abort_tx(cfhsi); - } - } else { - /* Schedule wake up work queue if the we initiate. */ - if (!test_and_set_bit(CFHSI_WAKE_UP, &cfhsi->bits)) - queue_work(cfhsi->wq, &cfhsi->wake_up_work); - } - - return NETDEV_TX_OK; -} - -static const struct net_device_ops cfhsi_netdevops; - -static void cfhsi_setup(struct net_device *dev) -{ - int i; - struct cfhsi *cfhsi = netdev_priv(dev); - dev->features = 0; - dev->type = ARPHRD_CAIF; - dev->flags = IFF_POINTOPOINT | IFF_NOARP; - dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; - dev->priv_flags |= IFF_NO_QUEUE; - dev->needs_free_netdev = true; - dev->netdev_ops = &cfhsi_netdevops; - for (i = 0; i < CFHSI_PRIO_LAST; ++i) - skb_queue_head_init(&cfhsi->qhead[i]); - cfhsi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; - cfhsi->cfdev.use_frag = false; - cfhsi->cfdev.use_stx = false; - cfhsi->cfdev.use_fcs = false; - cfhsi->ndev = dev; - cfhsi->cfg = hsi_default_config; -} - -static int cfhsi_open(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - int res; - - clear_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Initialize state vaiables. */ - cfhsi->tx_state = CFHSI_TX_STATE_IDLE; - cfhsi->rx_state.state = CFHSI_RX_STATE_DESC; - - /* Set flow info */ - cfhsi->flow_off_sent = 0; - - /* - * Allocate a TX buffer with the size of a HSI packet descriptors - * and the necessary room for CAIF payload frames. - */ - cfhsi->tx_buf = kzalloc(CFHSI_BUF_SZ_TX, GFP_KERNEL); - if (!cfhsi->tx_buf) { - res = -ENODEV; - goto err_alloc_tx; - } - - /* - * Allocate a RX buffer with the size of two HSI packet descriptors and - * the necessary room for CAIF payload frames. - */ - cfhsi->rx_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_buf) { - res = -ENODEV; - goto err_alloc_rx; - } - - cfhsi->rx_flip_buf = kzalloc(CFHSI_BUF_SZ_RX, GFP_KERNEL); - if (!cfhsi->rx_flip_buf) { - res = -ENODEV; - goto err_alloc_rx_flip; - } - - /* Initialize aggregation timeout */ - cfhsi->cfg.aggregation_timeout = hsi_default_config.aggregation_timeout; - - /* Initialize recieve vaiables. */ - cfhsi->rx_ptr = cfhsi->rx_buf; - cfhsi->rx_len = CFHSI_DESC_SZ; - - /* Initialize spin locks. */ - spin_lock_init(&cfhsi->lock); - - /* Set up the driver. */ - cfhsi->cb_ops.tx_done_cb = cfhsi_tx_done_cb; - cfhsi->cb_ops.rx_done_cb = cfhsi_rx_done_cb; - cfhsi->cb_ops.wake_up_cb = cfhsi_wake_up_cb; - cfhsi->cb_ops.wake_down_cb = cfhsi_wake_down_cb; - - /* Initialize the work queues. */ - INIT_WORK(&cfhsi->wake_up_work, cfhsi_wake_up); - INIT_WORK(&cfhsi->wake_down_work, cfhsi_wake_down); - INIT_WORK(&cfhsi->out_of_sync_work, cfhsi_out_of_sync); - - /* Clear all bit fields. */ - clear_bit(CFHSI_WAKE_UP_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_DOWN_ACK, &cfhsi->bits); - clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - clear_bit(CFHSI_AWAKE, &cfhsi->bits); - - /* Create work thread. */ - cfhsi->wq = alloc_ordered_workqueue(cfhsi->ndev->name, WQ_MEM_RECLAIM); - if (!cfhsi->wq) { - netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n", - __func__); - res = -ENODEV; - goto err_create_wq; - } - - /* Initialize wait queues. */ - init_waitqueue_head(&cfhsi->wake_up_wait); - init_waitqueue_head(&cfhsi->wake_down_wait); - init_waitqueue_head(&cfhsi->flush_fifo_wait); - - /* Setup the inactivity timer. */ - timer_setup(&cfhsi->inactivity_timer, cfhsi_inactivity_tout, 0); - /* Setup the slowpath RX timer. */ - timer_setup(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath, 0); - /* Setup the aggregation timer. */ - timer_setup(&cfhsi->aggregation_timer, cfhsi_aggregation_tout, 0); - - /* Activate HSI interface. */ - res = cfhsi->ops->cfhsi_up(cfhsi->ops); - if (res) { - netdev_err(cfhsi->ndev, - "%s: can't activate HSI interface: %d.\n", - __func__, res); - goto err_activate; - } - - /* Flush FIFO */ - res = cfhsi_flush_fifo(cfhsi); - if (res) { - netdev_err(cfhsi->ndev, "%s: Can't flush FIFO: %d.\n", - __func__, res); - goto err_net_reg; - } - return res; - - err_net_reg: - cfhsi->ops->cfhsi_down(cfhsi->ops); - err_activate: - destroy_workqueue(cfhsi->wq); - err_create_wq: - kfree(cfhsi->rx_flip_buf); - err_alloc_rx_flip: - kfree(cfhsi->rx_buf); - err_alloc_rx: - kfree(cfhsi->tx_buf); - err_alloc_tx: - return res; -} - -static int cfhsi_close(struct net_device *ndev) -{ - struct cfhsi *cfhsi = netdev_priv(ndev); - u8 *tx_buf, *rx_buf, *flip_buf; - - /* going to shutdown driver */ - set_bit(CFHSI_SHUTDOWN, &cfhsi->bits); - - /* Delete timers if pending */ - del_timer_sync(&cfhsi->inactivity_timer); - del_timer_sync(&cfhsi->rx_slowpath_timer); - del_timer_sync(&cfhsi->aggregation_timer); - - /* Cancel pending RX request (if any) */ - cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); - - /* Destroy workqueue */ - destroy_workqueue(cfhsi->wq); - - /* Store bufferes: will be freed later. */ - tx_buf = cfhsi->tx_buf; - rx_buf = cfhsi->rx_buf; - flip_buf = cfhsi->rx_flip_buf; - /* Flush transmit queues. */ - cfhsi_abort_tx(cfhsi); - - /* Deactivate interface */ - cfhsi->ops->cfhsi_down(cfhsi->ops); - - /* Free buffers. */ - kfree(tx_buf); - kfree(rx_buf); - kfree(flip_buf); - return 0; -} - -static void cfhsi_uninit(struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - ASSERT_RTNL(); - symbol_put(cfhsi_get_device); - list_del(&cfhsi->list); -} - -static const struct net_device_ops cfhsi_netdevops = { - .ndo_uninit = cfhsi_uninit, - .ndo_open = cfhsi_open, - .ndo_stop = cfhsi_close, - .ndo_start_xmit = cfhsi_xmit -}; - -static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) -{ - int i; - - if (!data) { - pr_debug("no params data found\n"); - return; - } - - i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; - /* - * Inactivity timeout in millisecs. Lowest possible value is 1, - * and highest possible is NEXT_TIMER_MAX_DELTA. - */ - if (data[i]) { - u32 inactivity_timeout = nla_get_u32(data[i]); - /* Pre-calculate inactivity timeout. */ - cfhsi->cfg.inactivity_timeout = inactivity_timeout * HZ / 1000; - if (cfhsi->cfg.inactivity_timeout == 0) - cfhsi->cfg.inactivity_timeout = 1; - else if (cfhsi->cfg.inactivity_timeout > NEXT_TIMER_MAX_DELTA) - cfhsi->cfg.inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } - - i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; - if (data[i]) - cfhsi->cfg.aggregation_timeout = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_HEAD_ALIGN; - if (data[i]) - cfhsi->cfg.head_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_TAIL_ALIGN; - if (data[i]) - cfhsi->cfg.tail_align = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; - if (data[i]) - cfhsi->cfg.q_high_mark = nla_get_u32(data[i]); - - i = __IFLA_CAIF_HSI_QLOW_WATERMARK; - if (data[i]) - cfhsi->cfg.q_low_mark = nla_get_u32(data[i]); -} - -static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - cfhsi_netlink_parms(data, netdev_priv(dev)); - netdev_state_change(dev); - return 0; -} - -static const struct nla_policy caif_hsi_policy[__IFLA_CAIF_HSI_MAX + 1] = { - [__IFLA_CAIF_HSI_INACTIVITY_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_AGGREGATION_TOUT] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_HEAD_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_TAIL_ALIGN] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QHIGH_WATERMARK] = { .type = NLA_U32, .len = 4 }, - [__IFLA_CAIF_HSI_QLOW_WATERMARK] = { .type = NLA_U32, .len = 4 }, -}; - -static size_t caif_hsi_get_size(const struct net_device *dev) -{ - int i; - size_t s = 0; - for (i = __IFLA_CAIF_HSI_UNSPEC + 1; i < __IFLA_CAIF_HSI_MAX; i++) - s += nla_total_size(caif_hsi_policy[i].len); - return s; -} - -static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) -{ - struct cfhsi *cfhsi = netdev_priv(dev); - - if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, - cfhsi->cfg.inactivity_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, - cfhsi->cfg.aggregation_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, - cfhsi->cfg.head_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, - cfhsi->cfg.tail_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, - cfhsi->cfg.q_high_mark) || - nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, - cfhsi->cfg.q_low_mark)) - return -EMSGSIZE; - - return 0; -} - -static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - struct cfhsi *cfhsi = NULL; - struct cfhsi_ops *(*get_ops)(void); - - ASSERT_RTNL(); - - cfhsi = netdev_priv(dev); - cfhsi_netlink_parms(data, cfhsi); - - get_ops = symbol_get(cfhsi_get_ops); - if (!get_ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - return -ENODEV; - } - - /* Assign the HSI device. */ - cfhsi->ops = (*get_ops)(); - if (!cfhsi->ops) { - pr_err("%s: failed to get the cfhsi_ops\n", __func__); - goto err; - } - - /* Assign the driver to this HSI device. */ - cfhsi->ops->cb_ops = &cfhsi->cb_ops; - if (register_netdevice(dev)) { - pr_warn("%s: caif_hsi device registration failed\n", __func__); - goto err; - } - /* Add CAIF HSI device to list. */ - list_add_tail(&cfhsi->list, &cfhsi_list); - - return 0; -err: - symbol_put(cfhsi_get_ops); - return -ENODEV; -} - -static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = { - .kind = "cfhsi", - .priv_size = sizeof(struct cfhsi), - .setup = cfhsi_setup, - .maxtype = __IFLA_CAIF_HSI_MAX, - .policy = caif_hsi_policy, - .newlink = caif_hsi_newlink, - .changelink = caif_hsi_changelink, - .get_size = caif_hsi_get_size, - .fill_info = caif_hsi_fill_info, -}; - -static void __exit cfhsi_exit_module(void) -{ - struct list_head *list_node; - struct list_head *n; - struct cfhsi *cfhsi; - - rtnl_link_unregister(&caif_hsi_link_ops); - - rtnl_lock(); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); - unregister_netdevice(cfhsi->ndev); - } - rtnl_unlock(); -} - -static int __init cfhsi_init_module(void) -{ - return rtnl_link_register(&caif_hsi_link_ops); -} - -module_init(cfhsi_init_module); -module_exit(cfhsi_exit_module); diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h deleted file mode 100644 index 552cf68d28d2..000000000000 --- a/include/net/caif/caif_hsi.h +++ /dev/null @@ -1,200 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Author: Daniel Martensson / daniel.martensson@stericsson.com - * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com - */ - -#ifndef CAIF_HSI_H_ -#define CAIF_HSI_H_ - -#include -#include -#include - -/* - * Maximum number of CAIF frames that can reside in the same HSI frame. - */ -#define CFHSI_MAX_PKTS 15 - -/* - * Maximum number of bytes used for the frame that can be embedded in the - * HSI descriptor. - */ -#define CFHSI_MAX_EMB_FRM_SZ 96 - -/* - * Decides if HSI buffers should be prefilled with 0xFF pattern for easier - * debugging. Both TX and RX buffers will be filled before the transfer. - */ -#define CFHSI_DBG_PREFILL 0 - -/* Structure describing a HSI packet descriptor. */ -#pragma pack(1) /* Byte alignment. */ -struct cfhsi_desc { - u8 header; - u8 offset; - u16 cffrm_len[CFHSI_MAX_PKTS]; - u8 emb_frm[CFHSI_MAX_EMB_FRM_SZ]; -}; -#pragma pack() /* Default alignment. */ - -/* Size of the complete HSI packet descriptor. */ -#define CFHSI_DESC_SZ (sizeof(struct cfhsi_desc)) - -/* - * Size of the complete HSI packet descriptor excluding the optional embedded - * CAIF frame. - */ -#define CFHSI_DESC_SHORT_SZ (CFHSI_DESC_SZ - CFHSI_MAX_EMB_FRM_SZ) - -/* - * Maximum bytes transferred in one transfer. - */ -#define CFHSI_MAX_CAIF_FRAME_SZ 4096 - -#define CFHSI_MAX_PAYLOAD_SZ (CFHSI_MAX_PKTS * CFHSI_MAX_CAIF_FRAME_SZ) - -/* Size of the complete HSI TX buffer. */ -#define CFHSI_BUF_SZ_TX (CFHSI_DESC_SZ + CFHSI_MAX_PAYLOAD_SZ) - -/* Size of the complete HSI RX buffer. */ -#define CFHSI_BUF_SZ_RX ((2 * CFHSI_DESC_SZ) + CFHSI_MAX_PAYLOAD_SZ) - -/* Bitmasks for the HSI descriptor. */ -#define CFHSI_PIGGY_DESC (0x01 << 7) - -#define CFHSI_TX_STATE_IDLE 0 -#define CFHSI_TX_STATE_XFER 1 - -#define CFHSI_RX_STATE_DESC 0 -#define CFHSI_RX_STATE_PAYLOAD 1 - -/* Bitmasks for power management. */ -#define CFHSI_WAKE_UP 0 -#define CFHSI_WAKE_UP_ACK 1 -#define CFHSI_WAKE_DOWN_ACK 2 -#define CFHSI_AWAKE 3 -#define CFHSI_WAKELOCK_HELD 4 -#define CFHSI_SHUTDOWN 5 -#define CFHSI_FLUSH_FIFO 6 - -#ifndef CFHSI_INACTIVITY_TOUT -#define CFHSI_INACTIVITY_TOUT (1 * HZ) -#endif /* CFHSI_INACTIVITY_TOUT */ - -#ifndef CFHSI_WAKE_TOUT -#define CFHSI_WAKE_TOUT (3 * HZ) -#endif /* CFHSI_WAKE_TOUT */ - -#ifndef CFHSI_MAX_RX_RETRIES -#define CFHSI_MAX_RX_RETRIES (10 * HZ) -#endif - -/* Structure implemented by the CAIF HSI driver. */ -struct cfhsi_cb_ops { - void (*tx_done_cb) (struct cfhsi_cb_ops *drv); - void (*rx_done_cb) (struct cfhsi_cb_ops *drv); - void (*wake_up_cb) (struct cfhsi_cb_ops *drv); - void (*wake_down_cb) (struct cfhsi_cb_ops *drv); -}; - -/* Structure implemented by HSI device. */ -struct cfhsi_ops { - int (*cfhsi_up) (struct cfhsi_ops *dev); - int (*cfhsi_down) (struct cfhsi_ops *dev); - int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev); - int (*cfhsi_wake_up) (struct cfhsi_ops *dev); - int (*cfhsi_wake_down) (struct cfhsi_ops *dev); - int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status); - int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy); - int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev); - struct cfhsi_cb_ops *cb_ops; -}; - -/* Structure holds status of received CAIF frames processing */ -struct cfhsi_rx_state { - int state; - int nfrms; - int pld_len; - int retries; - bool piggy_desc; -}; - -/* Priority mapping */ -enum { - CFHSI_PRIO_CTL = 0, - CFHSI_PRIO_VI, - CFHSI_PRIO_VO, - CFHSI_PRIO_BEBK, - CFHSI_PRIO_LAST, -}; - -struct cfhsi_config { - u32 inactivity_timeout; - u32 aggregation_timeout; - u32 head_align; - u32 tail_align; - u32 q_high_mark; - u32 q_low_mark; -}; - -/* Structure implemented by CAIF HSI drivers. */ -struct cfhsi { - struct caif_dev_common cfdev; - struct net_device *ndev; - struct platform_device *pdev; - struct sk_buff_head qhead[CFHSI_PRIO_LAST]; - struct cfhsi_cb_ops cb_ops; - struct cfhsi_ops *ops; - int tx_state; - struct cfhsi_rx_state rx_state; - struct cfhsi_config cfg; - int rx_len; - u8 *rx_ptr; - u8 *tx_buf; - u8 *rx_buf; - u8 *rx_flip_buf; - spinlock_t lock; - int flow_off_sent; - struct list_head list; - struct work_struct wake_up_work; - struct work_struct wake_down_work; - struct work_struct out_of_sync_work; - struct workqueue_struct *wq; - wait_queue_head_t wake_up_wait; - wait_queue_head_t wake_down_wait; - wait_queue_head_t flush_fifo_wait; - struct timer_list inactivity_timer; - struct timer_list rx_slowpath_timer; - - /* TX aggregation */ - int aggregation_len; - struct timer_list aggregation_timer; - - unsigned long bits; -}; -extern struct platform_driver cfhsi_driver; - -/** - * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters. - * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before - * taking the HSI wakeline down, in milliseconds. - * When using RT Netlink to create, destroy or configure a CAIF HSI interface, - * enum ifla_caif_hsi is used to specify the configuration attributes. - */ -enum ifla_caif_hsi { - __IFLA_CAIF_HSI_UNSPEC, - __IFLA_CAIF_HSI_INACTIVITY_TOUT, - __IFLA_CAIF_HSI_AGGREGATION_TOUT, - __IFLA_CAIF_HSI_HEAD_ALIGN, - __IFLA_CAIF_HSI_TAIL_ALIGN, - __IFLA_CAIF_HSI_QHIGH_WATERMARK, - __IFLA_CAIF_HSI_QLOW_WATERMARK, - __IFLA_CAIF_HSI_MAX -}; - -struct cfhsi_ops *cfhsi_get_ops(void); - -#endif /* CAIF_HSI_H_ */ -- cgit v1.2.3-59-g8ed1b From 71158bb1f2d2da61385c58fc1114e1a1c19984ba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 30 Jun 2021 13:42:13 +0200 Subject: tcp: consistently disable header prediction for mptcp The MPTCP receive path is hooked only into the TCP slow-path. The DSS presence allows plain MPTCP traffic to hit that consistently. Since commit e1ff9e82e2ea ("net: mptcp: improve fallback to TCP"), when an MPTCP socket falls back to TCP, it can hit the TCP receive fast-path, and delay or stop triggering the event notification. Address the issue explicitly disabling the header prediction for MPTCP sockets. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/200 Fixes: e1ff9e82e2ea ("net: mptcp: improve fallback to TCP") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e668f1bf780d..17df9b047ee4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -686,6 +686,10 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { + /* mptcp hooks are only on the slow path */ + if (sk_is_mptcp((struct sock *)tp)) + return; + tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); -- cgit v1.2.3-59-g8ed1b From a23f89a9990684a0ca0cac4a2857c15d338ebe2d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 1 Jul 2021 08:02:24 +0300 Subject: netfilter: conntrack: nf_ct_gre_keymap_flush() removal nf_ct_gre_keymap_flush() is useless. It is called from nf_conntrack_cleanup_net_list() only and tries to remove nf_ct_gre_keymap entries from pernet gre keymap list. Though: a) at this point the list should already be empty, all its entries were deleted during the conntracks cleanup, because nf_conntrack_cleanup_net_list() executes nf_ct_iterate_cleanup(kill_all) before nf_conntrack_proto_pernet_fini(): nf_conntrack_cleanup_net_list +- nf_ct_iterate_cleanup | nf_ct_put | nf_conntrack_put | nf_conntrack_destroy | destroy_conntrack | destroy_gre_conntrack | nf_ct_gre_keymap_destroy `- nf_conntrack_proto_pernet_fini nf_ct_gre_keymap_flush b) Let's say we find that the keymap list is not empty. This means netns still has a conntrack associated with gre, in which case we should not free its memory, because this will lead to a double free and related crashes. However I doubt it could have gone unnoticed for years, obviously this does not happen in real life. So I think we can remove both nf_ct_gre_keymap_flush() and nf_conntrack_proto_pernet_fini(). Signed-off-by: Vasily Averin Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 1 - net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_proto.c | 7 ------- net/netfilter/nf_conntrack_proto_gre.c | 13 ------------- 4 files changed, 22 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 09f2efea0b97..13807ea94cd2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -30,7 +30,6 @@ void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); void nf_conntrack_proto_pernet_init(struct net *net); -void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); void nf_conntrack_proto_fini(void); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 96ba19fc8155..085a11f1eb43 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2457,7 +2457,6 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_proto_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 55647409a9be..8f7a9837349c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #endif } -void nf_conntrack_proto_pernet_fini(struct net *net) -{ -#ifdef CONFIG_NF_CT_PROTO_GRE - nf_ct_gre_keymap_flush(net); -#endif -} - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index db11e403d818..728eeb0aea87 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net) return &net->ct.nf_ct_proto.gre; } -void nf_ct_gre_keymap_flush(struct net *net) -{ - struct nf_gre_net *net_gre = gre_pernet(net); - struct nf_ct_gre_keymap *km, *tmp; - - spin_lock_bh(&keymap_lock); - list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del_rcu(&km->list); - kfree_rcu(km, rcu); - } - spin_unlock_bh(&keymap_lock); -} - static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) { -- cgit v1.2.3-59-g8ed1b From 40fc3054b45820c28ea3c65e2c86d041dc244a8a Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 2 Jul 2021 02:47:00 +0300 Subject: net: ipv6: fix return value of ip6_skb_dst_mtu Commit 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") introduced ip6_skb_dst_mtu with return value of signed int which is inconsistent with actually returned values. Also 2 users of this function actually assign its value to unsigned int variable and only __xfrm6_output assigns result of this function to signed variable but actually uses as unsigned in further comparisons and calls. Change this function to return unsigned int value. Fixes: 628a5c561890 ("[INET]: Add IP(V6)_PMTUDISC_RPOBE") Reviewed-by: David Ahern Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/xfrm6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f14149df5a65..625a38ccb5d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,7 +263,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { int mtu; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 57fa27c1cdf9..d0d280077721 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -49,7 +49,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu; + unsigned int mtu; bool toobig; #ifdef CONFIG_NETFILTER -- cgit v1.2.3-59-g8ed1b From b2aae654a4794ef898ad33a179f341eb610f6b85 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:54 +0800 Subject: net: stmmac: add mutex lock to protect est parameters Add a mutex lock to protect est structure parameters so that the EST parameters can be updated by other threads. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 12 +++++++++++- include/linux/stmmac.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 596626c71189..2e3cdf540168 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -796,14 +796,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv, GFP_KERNEL); if (!plat->est) return -ENOMEM; + + mutex_init(&priv->plat->est->lock); } else { memset(plat->est, 0, sizeof(*plat->est)); } size = qopt->num_entries; + mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; + mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -834,6 +838,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } + mutex_lock(&priv->plat->est->lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -847,8 +852,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; - if (fpe && !priv->dma_cap.fpesel) + if (fpe && !priv->dma_cap.fpesel) { + mutex_unlock(&priv->plat->est->lock); return -EOPNOTSUPP; + } /* Actual FPE register configuration will be done after FPE handshake * is success. @@ -857,6 +864,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -872,9 +880,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return 0; disable: + mutex_lock(&priv->plat->est->lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d5ae621d66ba..09157b8a5810 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -115,6 +115,7 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { + struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; -- cgit v1.2.3-59-g8ed1b From e9e3720002f61cd637a49ecafae77cac230eefae Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:55 +0800 Subject: net: stmmac: ptp: update tas basetime after ptp adjust After adjusting the ptp time, the Qbv base time may be the past time of the new current time. dwmac5 hardware limited the base time cannot be set as past time. This patch add a btr_reserve to store the base time get from qopt, then calculate the base time and reset the Qbv configuration after ptp time adjust. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 41 +++++++++++++++++++++++- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 6 +++- include/linux/stmmac.h | 1 + 3 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 4e86cdf2bc9f..580cc035536b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -62,7 +62,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) u32 sec, nsec; u32 quotient, reminder; int neg_adj = 0; - bool xmac; + bool xmac, est_rst = false; + int ret; xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; @@ -75,10 +76,48 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) sec = quotient; nsec = reminder; + /* If EST is enabled, disabled it before adjust ptp time. */ + if (priv->plat->est && priv->plat->est->enable) { + est_rst = true; + mutex_lock(&priv->plat->est->lock); + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + } + spin_lock_irqsave(&priv->ptp_lock, flags); stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); spin_unlock_irqrestore(&priv->ptp_lock, flags); + /* Caculate new basetime and re-configured EST after PTP time adjust. */ + if (est_rst) { + struct timespec64 current_time, time; + ktime_t current_time_ns, basetime; + u64 cycle_time; + + mutex_lock(&priv->plat->est->lock); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + time.tv_nsec = priv->plat->est->btr_reserve[0]; + time.tv_sec = priv->plat->est->btr_reserve[1]; + basetime = timespec64_to_ktime(time); + cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC + + priv->plat->est->ctr[0]; + time = stmmac_calc_tas_basetime(basetime, + current_time_ns, + cycle_time); + + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + priv->plat->est->enable = true; + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + if (ret) + netdev_err(priv->dev, "failed to configure EST\n"); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 2e3cdf540168..4f3b6437b114 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -739,7 +739,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time, current_time; + struct timespec64 time, current_time, qopt_time; ktime_t current_time_ns; bool fpe = false; int i, ret = 0; @@ -848,6 +848,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; + qopt_time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr_reserve[0] = (u32)qopt_time.tv_nsec; + priv->plat->est->btr_reserve[1] = (u32)qopt_time.tv_sec; + ctr = qopt->cycle_time; priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 09157b8a5810..a6f03b36fc4f 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -117,6 +117,7 @@ struct stmmac_axi { struct stmmac_est { struct mutex lock; int enable; + u32 btr_reserve[2]; u32 btr_offset[2]; u32 btr[2]; u32 ctr[2]; -- cgit v1.2.3-59-g8ed1b From 1da4cd82dd180224503e745ccf3220e3490d8897 Mon Sep 17 00:00:00 2001 From: Ali Abdallah Date: Thu, 27 May 2021 09:19:06 +0200 Subject: netfilter: conntrack: add new sysctl to disable RST check This patch adds a new sysctl tcp_ignore_invalid_rst to disable marking out of segments RSTs as INVALID. Signed-off-by: Ali Abdallah Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 6 ++++++ include/net/netns/conntrack.h | 1 + net/netfilter/nf_conntrack_proto_tcp.c | 6 +++++- net/netfilter/nf_conntrack_standalone.c | 10 ++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 0467b30e4abe..d31ed6c1cb0d 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -110,6 +110,12 @@ nf_conntrack_tcp_be_liberal - BOOLEAN Be conservative in what you do, be liberal in what you accept from others. If it's non-zero, we mark only out of window RST segments as INVALID. +nf_conntrack_tcp_ignore_invalid_rst - BOOLEAN + - 0 - disabled (default) + - 1 - enabled + + If it's 1, we don't mark out of window RST segments as INVALID. + nf_conntrack_tcp_loose - BOOLEAN - 0 - disabled - not 0 - enabled (default) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c3094b83a525..37e5300c7e5a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -27,6 +27,7 @@ struct nf_tcp_net { u8 tcp_loose; u8 tcp_be_liberal; u8 tcp_max_retrans; + u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; unsigned int offload_pickup; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b8ff67671e93..3259416f2ea4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1068,7 +1068,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (seq == 0 && !nf_conntrack_tcp_established(ct)) break; - if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) && + !tn->tcp_ignore_invalid_rst) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); @@ -1466,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net) */ tn->tcp_be_liberal = 0; + /* If it's non-zero, we turn off RST sequence number check */ + tn->tcp_ignore_invalid_rst = 0; + /* Max number of the retransmitted packets without receiving an (acceptable) * ACK from the destination. If this number is reached, a shorter timer * will be started. diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f57a951c9b5e..214d9f9e499b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -579,6 +579,7 @@ enum nf_ct_sysctl_index { #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST, NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, @@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = { + .procname = "nf_conntrack_tcp_ignore_invalid_rst", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", .maxlen = sizeof(u8), @@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, XASSIGN(LOOSE, &tn->tcp_loose); XASSIGN(LIBERAL, &tn->tcp_be_liberal); XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); + XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst); #undef XASSIGN #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) -- cgit v1.2.3-59-g8ed1b From 9a5605505d9c7dbfdb89cc29a8f5fc5cf9fd2334 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 5 Jul 2021 15:38:12 +0000 Subject: bonding: Add struct bond_ipesc to manage SA bonding has been supporting ipsec offload. When SA is added, bonding just passes SA to its own active real interface. But it doesn't manage SA. So, when events(add/del real interface, active real interface change, etc) occur, bonding can't handle that well because It doesn't manage SA. So some problems(panic, UAF, refcnt leak)occur. In order to make it stable, it should manage SA. That's the reason why struct bond_ipsec is added. When a new SA is added to bonding interface, it is stored in the bond_ipsec list. And the SA is passed to a current active real interface. If events occur, it uses bond_ipsec data to handle these events. bond->ipsec_list is protected by bond->ipsec_lock. If a current active real interface is changed, the following logic works. 1. delete all SAs from old active real interface 2. Add all SAs to the new active real interface. 3. If a new active real interface doesn't support ipsec offload or SA's option, it sets real_dev to NULL. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 139 ++++++++++++++++++++++++++++++++++------ include/net/bonding.h | 9 ++- 2 files changed, 127 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7b89743fab9..165fa55cfb38 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -401,6 +401,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, static int bond_ipsec_add_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; int err; @@ -416,9 +417,6 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -ENODEV; } - xs->xso.real_dev = slave->dev; - bond->xs = xs; - if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_add || netif_is_bond_master(slave->dev)) { @@ -427,11 +425,63 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); + if (!ipsec) { + rcu_read_unlock(); + return -ENOMEM; + } + xs->xso.real_dev = slave->dev; + err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs); + if (!err) { + ipsec->xs = xs; + INIT_LIST_HEAD(&ipsec->list); + spin_lock_bh(&bond->ipsec_lock); + list_add(&ipsec->list, &bond->ipsec_list); + spin_unlock_bh(&bond->ipsec_lock); + } else { + kfree(ipsec); + } rcu_read_unlock(); return err; } +static void bond_ipsec_add_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) + goto out; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(slave->dev)) { + spin_lock_bh(&bond->ipsec_lock); + if (!list_empty(&bond->ipsec_list)) + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_add\n", + __func__); + spin_unlock_bh(&bond->ipsec_lock); + goto out; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + ipsec->xs->xso.real_dev = slave->dev; + if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs)) { + slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); + ipsec->xs->xso.real_dev = NULL; + } + } + spin_unlock_bh(&bond->ipsec_lock); +out: + rcu_read_unlock(); +} + /** * bond_ipsec_del_sa - clear out this specific SA * @xs: pointer to transformer state struct @@ -439,6 +489,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs) static void bond_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -452,7 +503,10 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!slave) goto out; - xs->xso.real_dev = slave->dev; + if (!xs->xso.real_dev) + goto out; + + WARN_ON(xs->xso.real_dev != slave->dev); if (!slave->dev->xfrmdev_ops || !slave->dev->xfrmdev_ops->xdo_dev_state_delete || @@ -463,6 +517,48 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); out: + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + spin_unlock_bh(&bond->ipsec_lock); + rcu_read_unlock(); +} + +static void bond_ipsec_del_sa_all(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + struct bond_ipsec *ipsec; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (!slave) { + rcu_read_unlock(); + return; + } + + spin_lock_bh(&bond->ipsec_lock); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (!ipsec->xs->xso.real_dev) + continue; + + if (!slave->dev->xfrmdev_ops || + !slave->dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(slave->dev)) { + slave_warn(bond_dev, slave->dev, + "%s: no slave xdo_dev_state_delete\n", + __func__); + } else { + slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + } + ipsec->xs->xso.real_dev = NULL; + } + spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); } @@ -474,22 +570,27 @@ out: static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; - struct bonding *bond = netdev_priv(bond_dev); - struct slave *curr_active = rcu_dereference(bond->curr_active_slave); - struct net_device *slave_dev = curr_active->dev; + struct net_device *real_dev; + struct slave *curr_active; + struct bonding *bond; + + bond = netdev_priv(bond_dev); + curr_active = rcu_dereference(bond->curr_active_slave); + real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return true; - if (!slave_dev->xfrmdev_ops || - !slave_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(slave_dev)) { - slave_warn(bond_dev, slave_dev, "%s: no slave xdo_dev_offload_ok\n", __func__); + if (!xs->xso.real_dev) + return false; + + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_offload_ok || + netif_is_bond_master(real_dev)) { return false; } - xs->xso.real_dev = slave_dev; - return slave_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + return real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); } static const struct xfrmdev_ops bond_xfrmdev_ops = { @@ -1006,8 +1107,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) return; #ifdef CONFIG_XFRM_OFFLOAD - if (old_active && bond->xs) - bond_ipsec_del_sa(bond->xs); + bond_ipsec_del_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ if (new_active) { @@ -1082,10 +1182,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } #ifdef CONFIG_XFRM_OFFLOAD - if (new_active && bond->xs) { - xfrm_dev_state_flush(dev_net(bond->dev), bond->dev, true); - bond_ipsec_add_sa(bond->xs); - } + bond_ipsec_add_sa_all(bond); #endif /* CONFIG_XFRM_OFFLOAD */ /* resend IGMP joins since active slave has changed or @@ -3343,6 +3440,7 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); + xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); @@ -4910,7 +5008,8 @@ void bond_setup(struct net_device *bond_dev) #ifdef CONFIG_XFRM_OFFLOAD /* set up xfrm device ops (only supported in active-backup right now) */ bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; - bond->xs = NULL; + INIT_LIST_HEAD(&bond->ipsec_list); + spin_lock_init(&bond->ipsec_lock); #endif /* CONFIG_XFRM_OFFLOAD */ /* don't acquire bond device's netif_tx_lock when transmitting */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 15335732e166..625d9c72dee3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -201,6 +201,11 @@ struct bond_up_slave { */ #define BOND_LINK_NOCHANGE -1 +struct bond_ipsec { + struct list_head list; + struct xfrm_state *xs; +}; + /* * Here are the locking policies for the two bonding locks: * Get rcu_read_lock when reading or RTNL when writing slave list. @@ -249,7 +254,9 @@ struct bonding { #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; #ifdef CONFIG_XFRM_OFFLOAD - struct xfrm_state *xs; + struct list_head ipsec_list; + /* protecting ipsec_list */ + spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ }; -- cgit v1.2.3-59-g8ed1b From d322957ebfb9c21c2c72b66680f7c3ccd724e081 Mon Sep 17 00:00:00 2001 From: Duncan Roe Date: Wed, 7 Jul 2021 10:57:51 +1000 Subject: netfilter: uapi: refer to nfnetlink_conntrack.h, not nf_conntrack_netlink.h nf_conntrack_netlink.h does not exist, refer to nfnetlink_conntrack.h instead. Signed-off-by: Duncan Roe Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_log.h | 2 +- include/uapi/linux/netfilter/nfnetlink_queue.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_log.h b/include/uapi/linux/netfilter/nfnetlink_log.h index 45c8d3b027e0..0af9c113d665 100644 --- a/include/uapi/linux/netfilter/nfnetlink_log.h +++ b/include/uapi/linux/netfilter/nfnetlink_log.h @@ -61,7 +61,7 @@ enum nfulnl_attr_type { NFULA_HWTYPE, /* hardware type */ NFULA_HWHEADER, /* hardware header */ NFULA_HWLEN, /* hardware header length */ - NFULA_CT, /* nf_conntrack_netlink.h */ + NFULA_CT, /* nfnetlink_conntrack.h */ NFULA_CT_INFO, /* enum ip_conntrack_info */ NFULA_VLAN, /* nested attribute: packet vlan info */ NFULA_L2HDR, /* full L2 header */ diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index bcb2cb5d40b9..aed90c4df0c8 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -51,11 +51,11 @@ enum nfqnl_attr_type { NFQA_IFINDEX_PHYSOUTDEV, /* __u32 ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ - NFQA_CT, /* nf_conntrack_netlink.h */ + NFQA_CT, /* nfnetlink_conntrack.h */ NFQA_CT_INFO, /* enum ip_conntrack_info */ NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_SKB_INFO, /* __u32 skb meta information */ - NFQA_EXP, /* nf_conntrack_netlink.h */ + NFQA_EXP, /* nfnetlink_conntrack.h */ NFQA_UID, /* __u32 sk uid */ NFQA_GID, /* __u32 sk gid */ NFQA_SECCTX, /* security context string */ -- cgit v1.2.3-59-g8ed1b From f263a81451c12da5a342d90572e317e611846f2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 7 Jul 2021 15:38:47 -0700 Subject: bpf: Track subprog poke descriptors correctly and fix use-after-free Subprograms are calling map_poke_track(), but on program release there is no hook to call map_poke_untrack(). However, on program release, the aux memory (and poke descriptor table) is freed even though we still have a reference to it in the element list of the map aux data. When we run map_poke_run(), we then end up accessing free'd memory, triggering KASAN in prog_array_map_poke_run(): [...] [ 402.824689] BUG: KASAN: use-after-free in prog_array_map_poke_run+0xc2/0x34e [ 402.824698] Read of size 4 at addr ffff8881905a7940 by task hubble-fgs/4337 [ 402.824705] CPU: 1 PID: 4337 Comm: hubble-fgs Tainted: G I 5.12.0+ #399 [ 402.824715] Call Trace: [ 402.824719] dump_stack+0x93/0xc2 [ 402.824727] print_address_description.constprop.0+0x1a/0x140 [ 402.824736] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824740] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824744] kasan_report.cold+0x7c/0xd8 [ 402.824752] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824757] prog_array_map_poke_run+0xc2/0x34e [ 402.824765] bpf_fd_array_map_update_elem+0x124/0x1a0 [...] The elements concerned are walked as follows: for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; [...] The access to size_poke_tab is a 4 byte read, verified by checking offsets in the KASAN dump: [ 402.825004] The buggy address belongs to the object at ffff8881905a7800 which belongs to the cache kmalloc-1k of size 1024 [ 402.825008] The buggy address is located 320 bytes inside of 1024-byte region [ffff8881905a7800, ffff8881905a7c00) The pahole output of bpf_prog_aux: struct bpf_prog_aux { [...] /* --- cacheline 5 boundary (320 bytes) --- */ u32 size_poke_tab; /* 320 4 */ [...] In general, subprograms do not necessarily manage their own data structures. For example, BTF func_info and linfo are just pointers to the main program structure. This allows reference counting and cleanup to be done on the latter which simplifies their management a bit. The aux->poke_tab struct, however, did not follow this logic. The initial proposed fix for this use-after-free bug further embedded poke data tracking into the subprogram with proper reference counting. However, Daniel and Alexei questioned why we were treating these objects special; I agree, its unnecessary. The fix here removes the per subprogram poke table allocation and map tracking and instead simply points the aux->poke_tab pointer at the main programs poke table. This way, map tracking is simplified to the main program and we do not need to manage them per subprogram. This also means, bpf_prog_free_deferred(), which unwinds the program reference counting and kfrees objects, needs to ensure that we don't try to double free the poke_tab when free'ing the subprog structures. This is easily solved by NULL'ing the poke_tab pointer. The second detail is to ensure that per subprogram JIT logic only does fixups on poke_tab[] entries it owns. To do this, we add a pointer in the poke structure to point at the subprogram value so JITs can easily check while walking the poke_tab structure if the current entry belongs to the current program. The aux pointer is stable and therefore suitable for such comparison. On the jit_subprogs() error path, we omit cleaning up the poke->aux field because these are only ever referenced from the JIT side, but on error we will never make it to the JIT, so its fine to leave them dangling. Removing these pointers would complicate the error path for no reason. However, we do need to untrack all poke descriptors from the main program as otherwise they could race with the freeing of JIT memory from the subprograms. Lastly, a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") had an off-by-one on the subprogram instruction index range check as it was testing 'insn_idx >= subprog_start && insn_idx <= subprog_end'. However, subprog_end is the next subprogram's start instruction. Fixes: a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210707223848.14580-2-john.fastabend@gmail.com --- arch/x86/net/bpf_jit_comp.c | 3 +++ include/linux/bpf.h | 1 + kernel/bpf/core.c | 8 +++++- kernel/bpf/verifier.c | 60 ++++++++++++++++----------------------------- 4 files changed, 32 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e835164189f1..4b951458c9fc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) for (i = 0; i < prog->aux->size_poke_tab; i++) { poke = &prog->aux->poke_tab[i]; + if (poke->aux && poke->aux != prog->aux) + continue; + WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); if (poke->reason != BPF_POKE_REASON_TAIL_CALL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f309fc1509f2..e8e2b0393ca9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor { void *tailcall_target; void *tailcall_bypass; void *bypass_addr; + void *aux; union { struct { struct bpf_map *map; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 034ad93a1ad7..9b1577498373 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) + for (i = 0; i < aux->func_cnt; i++) { + /* We can just unlink the subprog poke descriptor table as + * it was originally linked to the main program and is also + * released along with it. + */ + aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); + } if (aux->func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index be38bb930bf1..42a4063de7cd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) goto out_free; func[i]->is_func = 1; func[i]->aux->func_idx = i; - /* the btf and func_info will be freed only at prog->aux */ + /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->poke_tab = prog->aux->poke_tab; + func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; for (j = 0; j < prog->aux->size_poke_tab; j++) { - u32 insn_idx = prog->aux->poke_tab[j].insn_idx; - int ret; + struct bpf_jit_poke_descriptor *poke; - if (!(insn_idx >= subprog_start && - insn_idx <= subprog_end)) - continue; - - ret = bpf_jit_add_poke_descriptor(func[i], - &prog->aux->poke_tab[j]); - if (ret < 0) { - verbose(env, "adding tail call poke descriptor failed\n"); - goto out_free; - } - - func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1; - - map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; - ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); - if (ret < 0) { - verbose(env, "tracking tail call prog failed\n"); - goto out_free; - } + poke = &prog->aux->poke_tab[j]; + if (poke->insn_idx < subprog_end && + poke->insn_idx >= subprog_start) + poke->aux = func[i]->aux; } /* Use bpf_prog_F_tag to indicate functions in stack traces. @@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) cond_resched(); } - /* Untrack main program's aux structs so that during map_poke_run() - * we will not stumble upon the unfilled poke descriptors; each - * of the main program's poke descs got distributed across subprogs - * and got tracked onto map, so we are sure that none of them will - * be missed after the operation below - */ - for (i = 0; i < prog->aux->size_poke_tab; i++) { - map_ptr = prog->aux->poke_tab[i].tail_call.map; - - map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); - } - /* at this point all bpf functions were successfully JITed * now populate all bpf_calls with correct addresses and * run last pass of JIT @@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_jit_attempt_done(prog); return 0; out_free: + /* We failed JIT'ing, so at this point we need to unregister poke + * descriptors from subprogs, so that kernel is not attempting to + * patch it anymore as we're freeing the subprog JIT memory. + */ + for (i = 0; i < prog->aux->size_poke_tab; i++) { + map_ptr = prog->aux->poke_tab[i].tail_call.map; + map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); + } + /* At this point we're guaranteed that poke descriptors are not + * live anymore. We can just unlink its descriptor table as it's + * released with the main prog. + */ for (i = 0; i < env->subprog_cnt; i++) { if (!func[i]) continue; - - for (j = 0; j < func[i]->aux->size_poke_tab; j++) { - map_ptr = func[i]->aux->poke_tab[j].tail_call.map; - map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux); - } + func[i]->aux->poke_tab = NULL; bpf_jit_free(func[i]); } kfree(func); -- cgit v1.2.3-59-g8ed1b From 67a9c94317402b826fc3db32afc8f39336803d97 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 9 Jul 2021 17:35:18 +0000 Subject: net: validate lwtstate->data before returning from skb_tunnel_info() skb_tunnel_info() returns pointer of lwtstate->data as ip_tunnel_info type without validation. lwtstate->data can have various types such as mpls_iptunnel_encap, etc and these are not compatible. So skb_tunnel_info() should validate before returning that pointer. Splat looks like: BUG: KASAN: slab-out-of-bounds in vxlan_get_route+0x418/0x4b0 [vxlan] Read of size 2 at addr ffff888106ec2698 by task ping/811 CPU: 1 PID: 811 Comm: ping Not tainted 5.13.0+ #1195 Call Trace: dump_stack_lvl+0x56/0x7b print_address_description.constprop.8.cold.13+0x13/0x2ee ? vxlan_get_route+0x418/0x4b0 [vxlan] ? vxlan_get_route+0x418/0x4b0 [vxlan] kasan_report.cold.14+0x83/0xdf ? vxlan_get_route+0x418/0x4b0 [vxlan] vxlan_get_route+0x418/0x4b0 [vxlan] [ ... ] vxlan_xmit_one+0x148b/0x32b0 [vxlan] [ ... ] vxlan_xmit+0x25c5/0x4780 [vxlan] [ ... ] dev_hard_start_xmit+0x1ae/0x6e0 __dev_queue_xmit+0x1f39/0x31a0 [ ... ] neigh_xmit+0x2f9/0x940 mpls_xmit+0x911/0x1600 [mpls_iptunnel] lwtunnel_xmit+0x18f/0x450 ip_finish_output2+0x867/0x2040 [ ... ] Fixes: 61adedf3e3f1 ("route: move lwtunnel state to dst_entry") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 56cb3c38569a..14efa0ded75d 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -45,7 +45,9 @@ skb_tunnel_info(const struct sk_buff *skb) return &md_dst->u.tun_info; dst = skb_dst(skb); - if (dst && dst->lwtstate) + if (dst && dst->lwtstate && + (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || + dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; -- cgit v1.2.3-59-g8ed1b From 6787b7e350d3552651a3422d3d8980fbc8d65368 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 9 Jul 2021 17:20:49 -0700 Subject: mptcp: avoid processing packet if a subflow reset If check_fully_established() causes a subflow reset, it should not continue to process the packet in tcp_data_queue(). Add a return value to mptcp_incoming_options(), and return false if a subflow has been reset, else return true. Then drop the packet in tcp_data_queue()/tcp_rcv_state_process() if mptcp_incoming_options() return false. Fixes: d582484726c4 ("mptcp: fix fallback for MP_JOIN subflows") Signed-off-by: Jianguo Wu Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/mptcp.h | 5 +++-- net/ipv4/tcp_input.c | 19 +++++++++++++++---- net/mptcp/options.c | 19 +++++++++++++------ 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index cb580b06152f..8b5af683a818 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts); -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts); @@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk, return false; } -static inline void mptcp_incoming_options(struct sock *sk, +static inline bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { + return true; } static inline void mptcp_skb_ext_move(struct sk_buff *to, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a5a8d0a378b2..149ceb5c94ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) { trace_tcp_receive_reset(sk); + /* mptcp can't tell us to ignore reset pkts, + * so just ignore the return value of mptcp_incoming_options(). + */ if (sk_is_mptcp(sk)) mptcp_incoming_options(sk, skb); @@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) bool fragstolen; int eaten; - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not continue + * to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) { + __kfree_skb(skb); + return; + } if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_CLOSING: case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - if (sk_is_mptcp(sk)) - mptcp_incoming_options(sk, skb); + /* If a subflow has been reset, the packet should not + * continue to be processed, drop the packet. + */ + if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) + goto discard; break; } fallthrough; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b5850afea343..4452455aef7f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) +/* Return false if a subflow has been reset, else return true */ +bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); @@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); - return; + return true; } mptcp_get_options(sk, skb, &mp_opt); + + /* The subflow can be in close state only if check_fully_established() + * just sent a reset. If so, tell the caller to ignore the current packet. + */ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) - return; + return sk->sk_state != TCP_CLOSE; if (mp_opt.fastclose && msk->local_key == mp_opt.rcvr_key) { @@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } if (!mp_opt.dss) - return; + return true; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck @@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) schedule_work(&msk->work)) sock_hold(subflow->conn); - return; + return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return; + return true; memset(mpext, 0, sizeof(*mpext)); @@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (mpext->csum_reqd) mpext->csum = mp_opt.csum; } + + return true; } static void mptcp_set_rwin(const struct tcp_sock *tp) -- cgit v1.2.3-59-g8ed1b From a5de4be0aaaa66a2fa98e8a33bdbed3bd0682804 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Sun, 11 Jul 2021 18:38:15 +0200 Subject: net: phy: marvell10g: fix differentiation of 88X3310 from 88X3340 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that we cannot differentiate 88X3310 from 88X3340 by simply looking at bit 3 of revision ID. This only works on revisions A0 and A1. On revision B0, this bit is always 1. Instead use the 3.d00d register for differentiation, since this register contains information about number of ports on the device. Fixes: 9885d016ffa9 ("net: phy: marvell10g: add separate structure for 88X3340") Signed-off-by: Marek Behún Reported-by: Matteo Croce Tested-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 40 +++++++++++++++++++++++++++++++++++----- include/linux/marvell_phy.h | 6 +----- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index bbbc6ac8fa82..53a433442803 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -78,6 +78,11 @@ enum { /* Temperature read register (88E2110 only) */ MV_PCS_TEMP = 0x8042, + /* Number of ports on the device */ + MV_PCS_PORT_INFO = 0xd00d, + MV_PCS_PORT_INFO_NPORTS_MASK = 0x0380, + MV_PCS_PORT_INFO_NPORTS_SHIFT = 7, + /* These registers appear at 0x800X and 0xa00X - the 0xa00X control * registers appear to set themselves to the 0x800X when AN is * restarted, but status registers appear readable from either. @@ -966,6 +971,30 @@ static const struct mv3310_chip mv2111_type = { #endif }; +static int mv3310_get_number_of_ports(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_PORT_INFO); + if (ret < 0) + return ret; + + ret &= MV_PCS_PORT_INFO_NPORTS_MASK; + ret >>= MV_PCS_PORT_INFO_NPORTS_SHIFT; + + return ret + 1; +} + +static int mv3310_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 1; +} + +static int mv3340_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 4; +} + static int mv211x_match_phy_device(struct phy_device *phydev, bool has_5g) { int val; @@ -994,7 +1023,8 @@ static int mv2111_match_phy_device(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3310_match_phy_device, .name = "mv88x3310", .driver_data = &mv3310_type, .get_features = mv3310_get_features, @@ -1011,8 +1041,9 @@ static struct phy_driver mv3310_drivers[] = { .set_loopback = genphy_c45_loopback, }, { - .phy_id = MARVELL_PHY_ID_88X3340, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id = MARVELL_PHY_ID_88X3310, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3340_match_phy_device, .name = "mv88x3340", .driver_data = &mv3340_type, .get_features = mv3310_get_features, @@ -1069,8 +1100,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_88X33X0_MASK }, - { MARVELL_PHY_ID_88X3340, MARVELL_PHY_ID_88X33X0_MASK }, + { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E2110, MARVELL_PHY_ID_MASK }, { }, }; diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index acee44b9db26..0f06c2287b52 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -22,14 +22,10 @@ #define MARVELL_PHY_ID_88E1545 0x01410ea0 #define MARVELL_PHY_ID_88E1548P 0x01410ec0 #define MARVELL_PHY_ID_88E3016 0x01410e60 +#define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 -/* PHY IDs and mask for Alaska 10G PHYs */ -#define MARVELL_PHY_ID_88X33X0_MASK 0xfffffff8 -#define MARVELL_PHY_ID_88X3310 0x002b09a0 -#define MARVELL_PHY_ID_88X3340 0x002b09a8 - /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -- cgit v1.2.3-59-g8ed1b