From cc5453a5b7e90c39f713091a7ebc53c1f87d1700 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Aug 2020 16:15:58 +0200 Subject: netfilter: conntrack: allow sctp hearbeat after connection re-use If an sctp connection gets re-used, heartbeats are flagged as invalid because their vtag doesn't match. Handle this in a similar way as TCP conntrack when it suspects that the endpoints and conntrack are out-of-sync. When a HEARTBEAT request fails its vtag validation, flag this in the conntrack state and accept the packet. When a HEARTBEAT_ACK is received with an invalid vtag in the reverse direction after we allowed such a HEARTBEAT through, assume we are out-of-sync and re-set the vtag info. v2: remove left-over snippet from an older incarnation that moved new_state/old_state assignments, thats not needed so keep that as-is. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 9a33f171aa82..625f491b95de 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,8 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 last_dir; + u8 flags; }; #endif /* _NF_CONNTRACK_SCTP_H */ -- cgit v1.2.3-59-g8ed1b From 4d2d4ce001f283ed8127173543b4cfb65641e357 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:01 +0200 Subject: KVM: arm64: Drop type input from kvm_put_guest We can use typeof() to avoid the need for the type input. Suggested-by: Marc Zyngier Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-4-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 2 +- include/linux/kvm_host.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 95f9580275b1..241ded7ee0ad 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -32,7 +32,7 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) steal_le = cpu_to_le64(steal); idx = srcu_read_lock(&kvm->srcu); offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); - kvm_put_guest(kvm, base + offset, steal_le, u64); + kvm_put_guest(kvm, base + offset, steal_le); srcu_read_unlock(&kvm->srcu, idx); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a23076765b4c..84371fb06209 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,25 +749,26 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); -#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +#define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ - type __user *__uaddr = (type __user *)(__addr + offset); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ - __ret = put_user(value, __uaddr); \ + __ret = put_user(v, __uaddr); \ if (!__ret) \ mark_page_dirty(kvm, gfn); \ __ret; \ }) -#define kvm_put_guest(kvm, gpa, value, type) \ +#define kvm_put_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ + \ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ - offset_in_page(__gpa), (value), type); \ + offset_in_page(__gpa), v); \ }) int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); -- cgit v1.2.3-59-g8ed1b From 53f985584e3c2ebe5f2455530fbf87a001528db8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:02 +0200 Subject: KVM: arm64: pvtime: Fix stolen time accounting across migration When updating the stolen time we should always read the current stolen time from the user provided memory, not from a kernel cache. If we use a cache then we'll end up resetting stolen time to zero on the first update after migration. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-5-drjones@redhat.com --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/pvtime.c | 23 +++++++++-------------- include/linux/kvm_host.h | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65568b23868a..dd9c3b25aa1e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -368,7 +368,6 @@ struct kvm_vcpu_arch { /* Guest PV state */ struct { - u64 steal; u64 last_steal; gpa_t base; } steal; diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 241ded7ee0ad..75234321d896 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -13,26 +13,22 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; u64 last_steal = vcpu->arch.steal.last_steal; - u64 steal; - __le64 steal_le; - u64 offset; + u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + u64 steal = 0; int idx; - u64 base = vcpu->arch.steal.base; if (base == GPA_INVALID) return; - /* Let's do the local bookkeeping */ - steal = vcpu->arch.steal.steal; - vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); - steal += vcpu->arch.steal.last_steal - last_steal; - vcpu->arch.steal.steal = steal; - - steal_le = cpu_to_le64(steal); idx = srcu_read_lock(&kvm->srcu); - offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); - kvm_put_guest(kvm, base + offset, steal_le); + if (!kvm_get_guest(kvm, base + offset, steal)) { + steal = le64_to_cpu(steal); + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.steal.last_steal - last_steal; + kvm_put_guest(kvm, base + offset, cpu_to_le64(steal)); + } srcu_read_unlock(&kvm->srcu, idx); } @@ -66,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * Start counting stolen time from the time the guest requests * the feature enabled. */ - vcpu->arch.steal.steal = 0; vcpu->arch.steal.last_steal = current->sched_info.run_delay; idx = srcu_read_lock(&kvm->srcu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84371fb06209..05e3c2fb3ef7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,6 +749,26 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); +#define __kvm_get_guest(kvm, gfn, offset, v) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = get_user(v, __uaddr); \ + __ret; \ +}) + +#define kvm_get_guest(kvm, gpa, v) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + \ + __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), v); \ +}) + #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ -- cgit v1.2.3-59-g8ed1b From be769db2f95861cc8c7c8fedcc71a8c39b803b10 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 22 Aug 2020 08:23:29 +1000 Subject: net: Get rid of consume_skb when tracing is off The function consume_skb is only meaningful when tracing is enabled. This patch makes it conditional on CONFIG_TRACEPOINTS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ net/core/skbuff.c | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46881d902124..e8bca74857a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1056,7 +1056,16 @@ void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); + +#ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); +#else +static inline void consume_skb(struct sk_buff *skb) +{ + return kfree_skb(skb); +} +#endif + void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e18184ffa9c3..6faf73d6a0f7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -820,6 +820,7 @@ void skb_tx_error(struct sk_buff *skb) } EXPORT_SYMBOL(skb_tx_error); +#ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free @@ -837,6 +838,7 @@ void consume_skb(struct sk_buff *skb) __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); +#endif /** * consume_stateless_skb - free an skbuff, assuming it is stateless -- cgit v1.2.3-59-g8ed1b From aac544c3553d98ebe150dda19a25aa253f7ad3fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:11 +0200 Subject: Compiler Attributes: remove comment about sparse not supporting __has_attribute Sparse supports __has_attribute() since 2018-08-31, so the comment is not true anymore but more importantly is rather confusing. So remove it. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6122efdad6ad..af7a58c19e20 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -24,12 +24,6 @@ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute * by hand. - * - * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ - * depending on the compiler used to build it; however, these attributes have - * no semantic effects for sparse, so it does not matter. Also note that, - * in order to avoid sparse's warnings, even the unsupported ones must be - * defined to 0. */ #ifndef __has_attribute # define __has_attribute(x) __GCC4_has_attribute_##x -- cgit v1.2.3-59-g8ed1b From 5861af92ff2a2e002449191413c35f3ec5f721fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:26 +0200 Subject: Compiler Attributes: fix comment concerning GCC 4.6 GCC 4.6 is not supported anymore, so remove a reference to it, leaving just the part about version prior GCC 5. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index af7a58c19e20..ea7b756b1c8f 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -22,7 +22,7 @@ /* * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute + * In the meantime, to support gcc < 5, we implement __has_attribute * by hand. */ #ifndef __has_attribute -- cgit v1.2.3-59-g8ed1b From 645f08975f49441b3e753d8dc5b740cbcb226594 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Aug 2020 07:27:49 -0400 Subject: net: Fix some comments Fix some comments, including wrong function name, duplicated word and so on. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- include/uapi/linux/in.h | 2 +- net/core/sock.c | 2 +- net/ipv4/raw.c | 2 +- net/l3mdev/l3mdev.c | 2 +- net/socket.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e8bca74857a3..8d9ab50b08c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -71,7 +71,7 @@ * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or - * IPv4|UDP where the Next Header field in the IPv6 + * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with @@ -2667,7 +2667,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) * * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) * to reduce average number of cache lines per packet. - * get_rps_cpus() for example only access one 64 bytes aligned block : + * get_rps_cpu() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 3d0d8231dc19..7d6687618d80 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -135,7 +135,7 @@ struct in_addr { * this socket to prevent accepting spoofed ones. */ #define IP_PMTUDISC_INTERFACE 4 -/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get +/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get * fragmented if they exeed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/net/core/sock.c b/net/core/sock.c index e4f40b175acb..8eb2c924805a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3254,7 +3254,7 @@ void sk_common_release(struct sock *sk) sk->sk_prot->destroy(sk); /* - * Observation: when sock_common_release is called, processes have + * Observation: when sk_common_release is called, processes have * no access to socket. But net still has. * Step one, detach it from networking: * diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6fd4330287c2..407956be7deb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -610,7 +610,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) { ipc.oif = inet->uc_index; } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { - /* oif is set, packet is to local broadcast and + /* oif is set, packet is to local broadcast * and uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index e71ca5aec684..864326f150e2 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -154,7 +154,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu); /** - * l3mdev_fib_table - get FIB table id associated with an L3 + * l3mdev_fib_table_rcu - get FIB table id associated with an L3 * master interface * @dev: targeted interface */ diff --git a/net/socket.c b/net/socket.c index dbbe8ea7d395..0c0144604f81 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3610,7 +3610,7 @@ int kernel_getsockname(struct socket *sock, struct sockaddr *addr) EXPORT_SYMBOL(kernel_getsockname); /** - * kernel_peername - get the address which the socket is connected (kernel space) + * kernel_getpeername - get the address which the socket is connected (kernel space) * @sock: socket * @addr: address holder * @@ -3671,7 +3671,7 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, EXPORT_SYMBOL(kernel_sendpage_locked); /** - * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space) * @sock: socket * @how: connection part * -- cgit v1.2.3-59-g8ed1b From ee921183557af39c1a0475f982d43b0fcac25e2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Aug 2020 13:55:36 +0200 Subject: netfilter: nfnetlink: nfnetlink_unicast() reports EAGAIN instead of ENOBUFS Frontend callback reports EAGAIN to nfnetlink to retry a command, this is used to signal that module autoloading is required. Unfortunately, nlmsg_unicast() reports EAGAIN in case the receiver socket buffer gets full, so it enters a busy-loop. This patch updates nfnetlink_unicast() to turn EAGAIN into ENOBUFS and to use nlmsg_unicast(). Remove the flags field in nfnetlink_unicast() since this is always MSG_DONTWAIT in the existing code which is exactly what nlmsg_unicast() passes to netlink_unicast() as parameter. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 3 +- net/netfilter/nf_tables_api.c | 61 ++++++++++++++++++------------------- net/netfilter/nfnetlink.c | 11 +++++-- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 2 +- 5 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 851425c3178f..89016d08f6a2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,8 +43,7 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, - int flags); +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid); static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 71e501c5ad21..b7dc1cbf40ea 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -815,11 +815,11 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, family, table); if (err < 0) - goto err; + goto err_fill_table_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_table_info: kfree_skb(skb2); return err; } @@ -1563,11 +1563,11 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, family, table, chain); if (err < 0) - goto err; + goto err_fill_chain_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_chain_info: kfree_skb(skb2); return err; } @@ -3008,11 +3008,11 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, NULL); if (err < 0) - goto err; + goto err_fill_rule_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_rule_info: kfree_skb(skb2); return err; } @@ -3968,11 +3968,11 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); if (err < 0) - goto err; + goto err_fill_set_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_set_info: kfree_skb(skb2); return err; } @@ -4860,24 +4860,18 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -ENOMEM; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb == NULL) - goto err1; + return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, NFT_MSG_NEWSETELEM, 0, set, &elem); if (err < 0) - goto err2; + goto err_fill_setelem; - err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT); - /* This avoids a loop in nfnetlink. */ - if (err < 0) - goto err1; + return nfnetlink_unicast(skb, ctx->net, ctx->portid); - return 0; -err2: +err_fill_setelem: kfree_skb(skb); -err1: - /* this avoids a loop in nfnetlink. */ - return err == -EAGAIN ? -ENOBUFS : err; + return err; } /* called with rcu_read_lock held */ @@ -6182,10 +6176,11 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); if (err < 0) - goto err; + goto err_fill_obj_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_obj_info: kfree_skb(skb2); return err; } @@ -7045,10 +7040,11 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, NFT_MSG_NEWFLOWTABLE, 0, family, flowtable, &flowtable->hook_list); if (err < 0) - goto err; + goto err_fill_flowtable_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_flowtable_info: kfree_skb(skb2); return err; } @@ -7234,10 +7230,11 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk, err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq); if (err < 0) - goto err; + goto err_fill_gen_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_gen_info: kfree_skb(skb2); return err; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 5f24edf95830..3a2e64e13b22 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -149,10 +149,15 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, - int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid) { - return netlink_unicast(net->nfnl, skb, portid, flags); + int err; + + err = nlmsg_unicast(net->nfnl, skb, portid); + if (err == -EAGAIN) + err = -ENOBUFS; + + return err; } EXPORT_SYMBOL_GPL(nfnetlink_unicast); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f02992419850..b35e8d9a5b37 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -356,8 +356,7 @@ __nfulnl_send(struct nfulnl_instance *inst) goto out; } } - nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid); out: inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index dadfc06245a3..d1d8bca03b4f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -681,7 +681,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, net, queue->peer_portid); if (err < 0) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; -- cgit v1.2.3-59-g8ed1b From e5fc436f06eef54ef512ea55a9db8eb9f2e76959 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Fri, 28 Aug 2020 10:53:01 +0200 Subject: sparse: use static inline for __chk_{user,io}_ptr() __chk_user_ptr() & __chk_io_ptr() are dummy extern functions which only exist to enforce the typechecking of __user or __iomem pointers in macros when using sparse. This typechecking is done by inserting a call to these functions. But the presence of these calls can inhibit some simplifications and so influence the result of sparse's analysis of context/locking. Fix this by changing these calls into static inline calls with an empty body. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4b33cb385f96..6e390d58a9f8 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -11,8 +11,8 @@ # define __iomem __attribute__((noderef, address_space(__iomem))) # define __percpu __attribute__((noderef, address_space(__percpu))) # define __rcu __attribute__((noderef, address_space(__rcu))) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); +static inline void __chk_user_ptr(const volatile void __user *ptr) { } +static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) -- cgit v1.2.3-59-g8ed1b From 35556bed836f8dc07ac55f69c8d17dce3e7f0e25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Sep 2020 10:52:33 +0100 Subject: HID: core: Sanitize event code and type when mapping input When calling into hid_map_usage(), the passed event code is blindly stored as is, even if it doesn't fit in the associated bitmap. This event code can come from a variety of sources, including devices masquerading as input devices, only a bit more "programmable". Instead of taking the event code at face value, check that it actually fits the corresponding bitmap, and if it doesn't: - spit out a warning so that we know which device is acting up - NULLify the bitmap pointer so that we catch unexpected uses Code paths that can make use of untrusted inputs can now check that the mapping was indeed correct and bail out if not. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-input.c | 4 ++++ drivers/hid/hid-multitouch.c | 2 ++ include/linux/hid.h | 42 +++++++++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index b8eabf206e74..88e19996427e 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1132,6 +1132,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel } mapped: + /* Mapping failed, bail out */ + if (!bit) + return; + if (device->driver->input_mapped && device->driver->input_mapped(device, hidinput, field, usage, &bit, &max) < 0) { diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 3f94b4954225..e3152155c4b8 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -856,6 +856,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, code = BTN_0 + ((usage->hid - 1) & HID_USAGE); hid_map_usage(hi, usage, bit, max, EV_KEY, code); + if (!*bit) + return -1; input_set_capability(hi->input, EV_KEY, code); return 1; diff --git a/include/linux/hid.h b/include/linux/hid.h index 875f71132b14..c7044a14200e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -959,34 +959,49 @@ static inline void hid_device_io_stop(struct hid_device *hid) { * @max: maximal valid usage->code to consider later (out parameter) * @type: input event type (EV_KEY, EV_REL, ...) * @c: code which corresponds to this usage and type + * + * The value pointed to by @bit will be set to NULL if either @type is + * an unhandled event type, or if @c is out of range for @type. This + * can be used as an error condition. */ static inline void hid_map_usage(struct hid_input *hidinput, struct hid_usage *usage, unsigned long **bit, int *max, - __u8 type, __u16 c) + __u8 type, unsigned int c) { struct input_dev *input = hidinput->input; - - usage->type = type; - usage->code = c; + unsigned long *bmap = NULL; + unsigned int limit = 0; switch (type) { case EV_ABS: - *bit = input->absbit; - *max = ABS_MAX; + bmap = input->absbit; + limit = ABS_MAX; break; case EV_REL: - *bit = input->relbit; - *max = REL_MAX; + bmap = input->relbit; + limit = REL_MAX; break; case EV_KEY: - *bit = input->keybit; - *max = KEY_MAX; + bmap = input->keybit; + limit = KEY_MAX; break; case EV_LED: - *bit = input->ledbit; - *max = LED_MAX; + bmap = input->ledbit; + limit = LED_MAX; break; } + + if (unlikely(c > limit || !bmap)) { + pr_warn_ratelimited("%s: Invalid code %d type %d\n", + input->name, c, type); + *bit = NULL; + return; + } + + usage->type = type; + usage->code = c; + *max = limit; + *bit = bmap; } /** @@ -1000,7 +1015,8 @@ static inline void hid_map_usage_clear(struct hid_input *hidinput, __u8 type, __u16 c) { hid_map_usage(hidinput, usage, bit, max, type, c); - clear_bit(c, *bit); + if (*bit) + clear_bit(usage->code, *bit); } /** -- cgit v1.2.3-59-g8ed1b From 3b5455636fe26ea21b4189d135a424a6da016418 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Sep 2020 12:32:45 -0400 Subject: libata: implement ATA_HORKAGE_MAX_TRIM_128M and apply to Sandisks All three generations of Sandisk SSDs lock up hard intermittently. Experiments showed that disabling NCQ lowered the failure rate significantly and the kernel has been disabling NCQ for some models of SD7's and 8's, which is obviously undesirable. Karthik worked with Sandisk to root cause the hard lockups to trim commands larger than 128M. This patch implements ATA_HORKAGE_MAX_TRIM_128M which limits max trim size to 128M and applies it to all three generations of Sandisk SSDs. Signed-off-by: Tejun Heo Cc: Karthik Shivaram Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 5 ++--- drivers/ata/libata-scsi.c | 8 +++++++- include/linux/libata.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b1cd4d97bc2a..1be73d29119a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3868,9 +3868,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */ { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, }, - /* Some Sandisk SSDs lock up hard with NCQ enabled. Reported on - SD7SN6S256G and SD8SN8U256G */ - { "SanDisk SD[78]SN*G", NULL, ATA_HORKAGE_NONCQ, }, + /* Sandisk SD7/8/9s lock up hard on large trims */ + { "SanDisk SD[789]*", NULL, ATA_HORKAGE_MAX_TRIM_128M, }, /* devices which puke on READ_NATIVE_MAX */ { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ec233208585b..c7b5049b42d1 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2080,6 +2080,7 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) { + struct ata_device *dev = args->dev; u16 min_io_sectors; rbuf[1] = 0xb0; @@ -2105,7 +2106,12 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) * with the unmap bit set. */ if (ata_id_has_trim(args->id)) { - put_unaligned_be64(65535 * ATA_MAX_TRIM_RNUM, &rbuf[36]); + u64 max_blocks = 65535 * ATA_MAX_TRIM_RNUM; + + if (dev->horkage & ATA_HORKAGE_MAX_TRIM_128M) + max_blocks = 128 << (20 - SECTOR_SHIFT); + + put_unaligned_be64(max_blocks, &rbuf[36]); put_unaligned_be32(1, &rbuf[28]); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 77ccf040a128..5f550eb27f81 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -421,6 +421,7 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-59-g8ed1b From 7e24969022cbd61ddc586f14824fc205661bb124 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2020 18:00:55 +0800 Subject: block: allow for_each_bvec to support zero len bvec Block layer usually doesn't support or allow zero-length bvec. Since commit 1bdc76aea115 ("iov_iter: use bvec iterator to implement iterate_bvec()"), iterate_bvec() switches to bvec iterator. However, Al mentioned that 'Zero-length segments are not disallowed' in iov_iter. Fixes for_each_bvec() so that it can move on after seeing one zero length bvec. Fixes: 1bdc76aea115 ("iov_iter: use bvec iterator to implement iterate_bvec()") Reported-by: syzbot Signed-off-by: Ming Lei Tested-by: Tetsuo Handa Cc: Al Viro Cc: Matthew Wilcox Cc: Link: https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2262077.html Signed-off-by: Jens Axboe --- include/linux/bvec.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ac0c7299d5b8..dd74503f7e5e 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -117,11 +117,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } +static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +{ + iter->bi_bvec_done = 0; + iter->bi_idx++; +} + #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ + (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -- cgit v1.2.3-59-g8ed1b From 4533d3aed857c558d6aabd00d0cb04100c5a2258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:25 +0200 Subject: memremap: rename MEMORY_DEVICE_DEVDAX to MEMORY_DEVICE_GENERIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation for the logic behind MEMORY_DEVICE_DEVDAX also being used by non DAX devices. No functional change intended. Signed-off-by: Roger Pau Monné Reviewed-by: Ira Weiny Acked-by: Andrew Morton Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20200901083326.21264-3-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/dax/device.c | 2 +- include/linux/memremap.h | 9 ++++----- mm/memremap.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 4c0af2eb7e19..1e89513f3c59 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -429,7 +429,7 @@ int dev_dax_probe(struct device *dev) return -EBUSY; } - dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; + dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 5f5b2df06e61..e5862746751b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -46,11 +46,10 @@ struct vmem_altmap { * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * - * MEMORY_DEVICE_DEVDAX: + * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. In contrast to - * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax - * character device. + * coherent and supports page pinning. This is for example used by DAX devices + * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer @@ -60,7 +59,7 @@ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, - MEMORY_DEVICE_DEVDAX, + MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; diff --git a/mm/memremap.c b/mm/memremap.c index 03e38b7a38f1..006dace60b1a 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -216,7 +216,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) return ERR_PTR(-EINVAL); } break; - case MEMORY_DEVICE_DEVDAX: + case MEMORY_DEVICE_GENERIC: need_devmap_managed = false; break; case MEMORY_DEVICE_PCI_P2PDMA: -- cgit v1.2.3-59-g8ed1b From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 29 +++++++++++++++++-------- include/linux/entry-common.h | 51 ++++++++++++++++++++++++++++++++++++-------- kernel/entry/common.c | 35 ++++++++++++++++++++++++------ 3 files changed, 91 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..2f84c7ca74ea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..159c7476b11b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** - * syscall_enter_from_user_mode - Check and handle work before invoking - * a syscall + * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs - * @syscall: The syscall number * * Invoked from architecture specific syscall entry code with interrupts * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and the subsequent functions can be - * instrumented. + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This handles lockdep, RCU (context tracking) and tracing state. + * + * This is invoked when there is extra architecture specific functionality + * to be done between establishing state and handling user mode entry work. + */ +void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * enabled after invoking syscall_enter_from_user_mode_prepare() and extra + * architecture specific work. * * Returns: The original or a modified syscall number * @@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * syscall_set_return_value() first. If neither of those are called and -1 * is returned, then the syscall will fail with ENOSYS. * - * The following functionality is handled here: + * It handles the following work items: * - * 1) Establish state (lockdep, RCU (context tracking), tracing) - * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), * __secure_computing(), trace_sys_enter() - * 3) Invocation of audit_syscall_entry() + * 2) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is combination of syscall_enter_from_user_mode_prepare() and + * syscall_enter_from_user_mode_work(). + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. */ long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index fcae019158ca..18683598edbc 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret ? : syscall_get_nr(current, regs); } -noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +static __always_inline long +__syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long ti_work; - enter_from_user_mode(regs); - instrumentation_begin(); - - local_irq_enable(); ti_work = READ_ONCE(current_thread_info()->flags); if (ti_work & SYSCALL_ENTER_WORK) syscall = syscall_trace_enter(regs, syscall, ti_work); - instrumentation_end(); return syscall; } +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + return __syscall_enter_from_user_work(regs, syscall); +} + +noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = __syscall_enter_from_user_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + /** * exit_to_user_mode - Fixup state when exiting to user mode * -- cgit v1.2.3-59-g8ed1b From a2d375eda771a6a4866f3717a8ed81b63acb1dbd Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 31 Aug 2020 12:22:09 -0600 Subject: dyndbg: refine export, rename to dynamic_debug_exec_queries() commit 4c0d77828d4f ("dyndbg: export ddebug_exec_queries") had a few problems: - broken non DYNAMIC_DEBUG_CORE configs, sparse warning - the exported function modifies query string, breaks on RO strings. - func name follows internal convention, shouldn't be exposed as is. 1st is fixed in header with ifdefd function prototype or stub defn. Also remove an obsolete HAVE-symbol ifdef-comment, and add others. Fix others by wrapping existing internal function with a new one, named in accordance with module-prefix naming convention, before export hits v5.9.0. In new function, copy query string to a local buffer, so users can pass hard-coded/RO queries, and internal function can be used unchanged. Fixes: 4c0d77828d4f ("dyndbg: export ddebug_exec_queries") Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20200831182210.850852-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 20 ++++++++++++++++---- lib/dynamic_debug.c | 27 +++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index aa9ff9e1c0b3..8aa0c7c2608c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -49,6 +49,10 @@ struct _ddebug { #if defined(CONFIG_DYNAMIC_DEBUG_CORE) + +/* exported for module authors to exercise >control */ +int dynamic_debug_exec_queries(const char *query, const char *modname); + int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname); extern int ddebug_remove_module(const char *mod_name); @@ -105,7 +109,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, static_branch_unlikely(&descriptor.key.dd_key_false) #endif -#else /* !HAVE_JUMP_LABEL */ +#else /* !CONFIG_JUMP_LABEL */ #define _DPRINTK_KEY_INIT @@ -117,7 +121,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) #endif -#endif +#endif /* CONFIG_JUMP_LABEL */ #define __dynamic_func_call(id, fmt, func, ...) do { \ DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ @@ -172,10 +176,11 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) -#else +#else /* !CONFIG_DYNAMIC_DEBUG_CORE */ #include #include +#include static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname) @@ -210,6 +215,13 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii); \ } while (0) -#endif + +static inline int dynamic_debug_exec_queries(const char *query, const char *modname) +{ + pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); + return 0; +} + +#endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ #endif diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 01b7d0210412..08e4b057514c 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -525,7 +525,7 @@ static int ddebug_exec_query(char *query_string, const char *modname) last error or number of matching callsites. Module name is either in param (for boot arg) or perhaps in query string. */ -int ddebug_exec_queries(char *query, const char *modname) +static int ddebug_exec_queries(char *query, const char *modname) { char *split; int i, errs = 0, exitcode = 0, rc, nfound = 0; @@ -557,7 +557,30 @@ int ddebug_exec_queries(char *query, const char *modname) return exitcode; return nfound; } -EXPORT_SYMBOL_GPL(ddebug_exec_queries); + +/** + * dynamic_debug_exec_queries - select and change dynamic-debug prints + * @query: query-string described in admin-guide/dynamic-debug-howto + * @modname: string containing module name, usually &module.mod_name + * + * This uses the >/proc/dynamic_debug/control reader, allowing module + * authors to modify their dynamic-debug callsites. The modname is + * canonically struct module.mod_name, but can also be null or a + * module-wildcard, for example: "drm*". + */ +int dynamic_debug_exec_queries(const char *query, const char *modname) +{ + int rc; + char *qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); + + if (!query) + return -ENOMEM; + + rc = ddebug_exec_queries(qry, modname); + kfree(qry); + return rc; +} +EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries); #define PREFIX_SIZE 64 -- cgit v1.2.3-59-g8ed1b From 1a0cf26323c80e2f1c58fc04f15686de61bfab0c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Aug 2020 19:49:56 -0400 Subject: mm/ksm: Remove reuse_ksm_page() Remove the function as the last reference has gone away with the do_wp_page() changes. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 7 ------- mm/ksm.c | 25 ------------------------- 2 files changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index e48b1e453ff5..161e8164abcf 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -53,8 +53,6 @@ struct page *ksm_might_need_to_copy(struct page *page, void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); -bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address); #else /* !CONFIG_KSM */ @@ -88,11 +86,6 @@ static inline void rmap_walk_ksm(struct page *page, static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) { } -static inline bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address) -{ - return false; -} #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ diff --git a/mm/ksm.c b/mm/ksm.c index 4102034cd55a..12958287fac3 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2660,31 +2660,6 @@ again: goto again; } -bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, - unsigned long address) -{ -#ifdef CONFIG_DEBUG_VM - if (WARN_ON(is_zero_pfn(page_to_pfn(page))) || - WARN_ON(!page_mapped(page)) || - WARN_ON(!PageLocked(page))) { - dump_page(page, "reuse_ksm_page"); - return false; - } -#endif - - if (PageSwapCache(page) || !page_stable_node(page)) - return false; - /* Prohibit parallel get_ksm_page() */ - if (!page_ref_freeze(page, 1)) - return false; - - page_move_anon_rmap(page, vma); - page->index = linear_page_index(vma, address); - page_ref_unfreeze(page, 1); - - return true; -} #ifdef CONFIG_MIGRATION void ksm_migrate_page(struct page *newpage, struct page *oldpage) { -- cgit v1.2.3-59-g8ed1b From 798a6b87ecd72828a6c6b5469aaa2032a57e92b7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Aug 2020 19:49:58 -0400 Subject: mm: Add PGREUSE counter This accounts for wp_page_reuse() case, where we reused a page for COW. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 1 + mm/memory.c | 1 + mm/vmstat.c | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 24fc7c3ae7d6..58d3f91baad1 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -30,6 +30,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGFAULT, PGMAJFAULT, PGLAZYFREED, PGREFILL, + PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSCAN_KSWAPD, diff --git a/mm/memory.c b/mm/memory.c index 56ae945c6845..20d93001ef93 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2619,6 +2619,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); + count_vm_event(PGREUSE); } /* diff --git a/mm/vmstat.c b/mm/vmstat.c index 3fb23a21f6dd..907a5045bfa3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1198,6 +1198,7 @@ const char * const vmstat_text[] = { "pglazyfreed", "pgrefill", + "pgreuse", "pgsteal_kswapd", "pgsteal_direct", "pgscan_kswapd", -- cgit v1.2.3-59-g8ed1b From 428fc0aff4e59399ec719ffcc1f7a5d29a4ee476 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 16:36:19 -0700 Subject: include/linux/log2.h: add missing () around n in roundup_pow_of_two() Otherwise gcc generates warnings if the expression is complicated. Fixes: 312a0c170945 ("[PATCH] LOG2: Alter roundup_pow_of_two() so that it can use a ilog2() on a constant") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/0-v1-8a2697e3c003+41165-log_brackets_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/log2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 83a4a3ca3e8a..c619ec6eff4a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -173,7 +173,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define roundup_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 1 : \ + ((n) == 1) ? 1 : \ (1UL << (ilog2((n) - 1) + 1)) \ ) : \ __roundup_pow_of_two(n) \ -- cgit v1.2.3-59-g8ed1b From 1c30474826682bc970c3200700d8bcfa2b771278 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:42:52 -0700 Subject: PM: : fix @em_pd kernel-doc warning Fix kernel-doc warning in : ../include/linux/device.h:613: warning: Function parameter or member 'em_pd' not described in 'device' Fixes: 1bc138c62295 ("PM / EM: add support for other devices than CPUs in Energy Model") Signed-off-by: Randy Dunlap Cc: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/d97f40ad-3033-703a-c3cb-2843ce0f6371@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..9e6ea8931a52 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -454,6 +454,7 @@ struct dev_links_info { * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. + * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors -- cgit v1.2.3-59-g8ed1b From 0a355aeb24081e4538d4d424cd189f16c0bbd983 Mon Sep 17 00:00:00 2001 From: Evan Nimmo Date: Wed, 9 Sep 2020 08:32:47 +1200 Subject: i2c: algo: pca: Reapply i2c bus settings after reset If something goes wrong (such as the SCL being stuck low) then we need to reset the PCA chip. The issue with this is that on reset we lose all config settings and the chip ends up in a disabled state which results in a lock up/high CPU usage. We need to re-apply any configuration that had previously been set and re-enable the chip. Signed-off-by: Evan Nimmo Reviewed-by: Chris Packham Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-pca.c | 35 +++++++++++++++++++++++------------ include/linux/i2c-algo-pca.h | 15 +++++++++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 710fbef9a9c2..384af88e58ad 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -41,8 +41,22 @@ static void pca_reset(struct i2c_algo_pca_data *adap) pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_IPRESET); pca_outw(adap, I2C_PCA_IND, 0xA5); pca_outw(adap, I2C_PCA_IND, 0x5A); + + /* + * After a reset we need to re-apply any configuration + * (calculated in pca_init) to get the bus in a working state. + */ + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_IMODE); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.mode); + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_ISCLL); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.tlow); + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_ISCLH); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.thi); + + pca_set_con(adap, I2C_PCA_CON_ENSIO); } else { adap->reset_chip(adap->data); + pca_set_con(adap, I2C_PCA_CON_ENSIO | adap->bus_settings.clock_freq); } } @@ -423,13 +437,14 @@ static int pca_init(struct i2c_adapter *adap) " Use the nominal frequency.\n", adap->name); } - pca_reset(pca_data); - clock = pca_clock(pca_data); printk(KERN_INFO "%s: Clock frequency is %dkHz\n", adap->name, freqs[clock]); - pca_set_con(pca_data, I2C_PCA_CON_ENSIO | clock); + /* Store settings as these will be needed when the PCA chip is reset */ + pca_data->bus_settings.clock_freq = clock; + + pca_reset(pca_data); } else { int clock; int mode; @@ -496,19 +511,15 @@ static int pca_init(struct i2c_adapter *adap) thi = tlow * min_thi / min_tlow; } + /* Store settings as these will be needed when the PCA chip is reset */ + pca_data->bus_settings.mode = mode; + pca_data->bus_settings.tlow = tlow; + pca_data->bus_settings.thi = thi; + pca_reset(pca_data); printk(KERN_INFO "%s: Clock frequency is %dHz\n", adap->name, clock * 100); - - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_IMODE); - pca_outw(pca_data, I2C_PCA_IND, mode); - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_ISCLL); - pca_outw(pca_data, I2C_PCA_IND, tlow); - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_ISCLH); - pca_outw(pca_data, I2C_PCA_IND, thi); - - pca_set_con(pca_data, I2C_PCA_CON_ENSIO); } udelay(500); /* 500 us for oscillator to stabilise */ diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index d03071732db4..7c522fdd9ea7 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -53,6 +53,20 @@ #define I2C_PCA_CON_SI 0x08 /* Serial Interrupt */ #define I2C_PCA_CON_CR 0x07 /* Clock Rate (MASK) */ +/** + * struct pca_i2c_bus_settings - The configured PCA i2c bus settings + * @mode: Configured i2c bus mode + * @tlow: Configured SCL LOW period + * @thi: Configured SCL HIGH period + * @clock_freq: The configured clock frequency + */ +struct pca_i2c_bus_settings { + int mode; + int tlow; + int thi; + int clock_freq; +}; + struct i2c_algo_pca_data { void *data; /* private low level data */ void (*write_byte) (void *data, int reg, int val); @@ -64,6 +78,7 @@ struct i2c_algo_pca_data { * For PCA9665, use the frequency you want here. */ unsigned int i2c_clock; unsigned int chip; + struct pca_i2c_bus_settings bus_settings; }; int i2c_pca_add_bus(struct i2c_adapter *); -- cgit v1.2.3-59-g8ed1b From baaabecfc80fad255f866563b53b8c7a3eec176e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Sep 2020 15:53:54 -0700 Subject: test_firmware: Test platform fw loading on non-EFI systems On non-EFI systems, it wasn't possible to test the platform firmware loader because it will have never set "checked_fw" during __init. Instead, allow the test code to override this check. Additionally split the declarations into a private symbol namespace so there is greater enforcement of the symbol visibility. Fixes: 548193cba2a7 ("test_firmware: add support for firmware_request_platform") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200909225354.3118328-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/embedded-firmware.c | 10 +++++----- include/linux/efi_embedded_fw.h | 6 ++---- lib/test_firmware.c | 9 +++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/embedded-firmware.c b/drivers/firmware/efi/embedded-firmware.c index e97a9c9d010c..21ae0c48232a 100644 --- a/drivers/firmware/efi/embedded-firmware.c +++ b/drivers/firmware/efi/embedded-firmware.c @@ -16,9 +16,9 @@ /* Exported for use by lib/test_firmware.c only */ LIST_HEAD(efi_embedded_fw_list); -EXPORT_SYMBOL_GPL(efi_embedded_fw_list); - -static bool checked_for_fw; +EXPORT_SYMBOL_NS_GPL(efi_embedded_fw_list, TEST_FIRMWARE); +bool efi_embedded_fw_checked; +EXPORT_SYMBOL_NS_GPL(efi_embedded_fw_checked, TEST_FIRMWARE); static const struct dmi_system_id * const embedded_fw_table[] = { #ifdef CONFIG_TOUCHSCREEN_DMI @@ -116,14 +116,14 @@ void __init efi_check_for_embedded_firmwares(void) } } - checked_for_fw = true; + efi_embedded_fw_checked = true; } int efi_get_embedded_fw(const char *name, const u8 **data, size_t *size) { struct efi_embedded_fw *iter, *fw = NULL; - if (!checked_for_fw) { + if (!efi_embedded_fw_checked) { pr_warn("Warning %s called while we did not check for embedded fw\n", __func__); return -ENOENT; diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h index 57eac5241303..a97a12bb2c9e 100644 --- a/include/linux/efi_embedded_fw.h +++ b/include/linux/efi_embedded_fw.h @@ -8,8 +8,8 @@ #define EFI_EMBEDDED_FW_PREFIX_LEN 8 /* - * This struct and efi_embedded_fw_list are private to the efi-embedded fw - * implementation they are in this header for use by lib/test_firmware.c only! + * This struct is private to the efi-embedded fw implementation. + * They are in this header for use by lib/test_firmware.c only! */ struct efi_embedded_fw { struct list_head list; @@ -18,8 +18,6 @@ struct efi_embedded_fw { size_t length; }; -extern struct list_head efi_embedded_fw_list; - /** * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw * code to search for embedded firmwares. diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 9fee2b93a8d1..06c955057756 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -26,6 +26,8 @@ #include #include +MODULE_IMPORT_NS(TEST_FIRMWARE); + #define TEST_FIRMWARE_NAME "test-firmware.bin" #define TEST_FIRMWARE_NUM_REQS 4 #define TEST_FIRMWARE_BUF_SIZE SZ_1K @@ -489,6 +491,9 @@ out: static DEVICE_ATTR_WO(trigger_request); #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE +extern struct list_head efi_embedded_fw_list; +extern bool efi_embedded_fw_checked; + static ssize_t trigger_request_platform_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -501,6 +506,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, }; struct efi_embedded_fw efi_embedded_fw; const struct firmware *firmware = NULL; + bool saved_efi_embedded_fw_checked; char *name; int rc; @@ -513,6 +519,8 @@ static ssize_t trigger_request_platform_store(struct device *dev, efi_embedded_fw.data = (void *)test_data; efi_embedded_fw.length = sizeof(test_data); list_add(&efi_embedded_fw.list, &efi_embedded_fw_list); + saved_efi_embedded_fw_checked = efi_embedded_fw_checked; + efi_embedded_fw_checked = true; pr_info("loading '%s'\n", name); rc = firmware_request_platform(&firmware, name, dev); @@ -530,6 +538,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, rc = count; out: + efi_embedded_fw_checked = saved_efi_embedded_fw_checked; release_firmware(firmware); list_del(&efi_embedded_fw.list); kfree(name); -- cgit v1.2.3-59-g8ed1b From 95035eac763294eb4543aea9afd48d2f7c8caa5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:42:52 -0700 Subject: PM: : fix @em_pd kernel-doc warning Fix kernel-doc warning in : ../include/linux/device.h:613: warning: Function parameter or member 'em_pd' not described in 'device' Fixes: 1bc138c62295 ("PM / EM: add support for other devices than CPUs in Energy Model") Signed-off-by: Randy Dunlap Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..9e6ea8931a52 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -454,6 +454,7 @@ struct dev_links_info { * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. + * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors -- cgit v1.2.3-59-g8ed1b From cc88b78c0870ebcab2123ba9e73689d97fbf3b14 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Thu, 10 Sep 2020 15:57:46 +0530 Subject: powercap: make documentation reflect code Fix up the documentation of the struct powercap_control_type members to match the code. Also fixup stray whitespace. Signed-off-by: Amit Kucheria [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/powercap.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/powercap.h b/include/linux/powercap.h index 4537f57f9e42..3d557bbcd2c7 100644 --- a/include/linux/powercap.h +++ b/include/linux/powercap.h @@ -44,19 +44,18 @@ struct powercap_control_type_ops { }; /** - * struct powercap_control_type- Defines a powercap control_type - * @name: name of control_type + * struct powercap_control_type - Defines a powercap control_type * @dev: device for this control_type * @idr: idr to have unique id for its child - * @root_node: Root holding power zones for this control_type + * @nr_zones: counter for number of zones of this type * @ops: Pointer to callback struct - * @node_lock: mutex for control type + * @lock: mutex for control type * @allocated: This is possible that client owns the memory * used by this structure. In this case * this flag is set to false by framework to * prevent deallocation during release process. * Otherwise this flag is set to true. - * @ctrl_inst: link to the control_type list + * @node: linked-list node * * Defines powercap control_type. This acts as a container for power * zones, which use same method to control power. E.g. RAPL, RAPL-PCI etc. @@ -129,7 +128,7 @@ struct powercap_zone_ops { * this flag is set to false by framework to * prevent deallocation during release process. * Otherwise this flag is set to true. - * @constraint_ptr: List of constraints for this zone. + * @constraints: List of constraints for this zone. * * This defines a power zone instance. The fields of this structure are * private, and should not be used by client drivers. -- cgit v1.2.3-59-g8ed1b From 639bf4415cadff4c18e13aa5cb0dba2d443e3aa7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 17 Sep 2020 12:02:21 +0300 Subject: net/mlx5: Refactor query port speed functions The functions mlx5_query_port_link_width_oper and mlx5_query_port_ib_proto_oper are always called together, so combine them to a new function called mlx5_query_port_oper to avoid duplication. And while the mlx5i_get_port_settings is the same as mlx5_query_port_oper therefore let's remove it. According to the IB spec link_width_oper and ib_proto_oper should be u16 and not as written u8, so perform casting as a preparation to cross-RDMA patch which will fix that type for all drivers in the RDMA subsystem. Fixes: ada68c31ba9c ("net/mlx5: Introduce a new header file for physical port functions") Signed-off-by: Aharon Landau Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 27 +++++++++++----------- .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 23 ++++-------------- drivers/net/ethernet/mellanox/mlx5/core/port.c | 23 ++++-------------- include/linux/mlx5/port.h | 7 +++--- 4 files changed, 25 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d60d63221b14..631ca360057f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -326,8 +326,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, + u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -384,7 +384,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { @@ -436,7 +436,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width, bool ext) { return ext ? @@ -457,6 +457,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, bool put_mdev = true; u16 qkey_viol_cntr; u32 eth_prot_oper; + u16 active_speed; u8 mdev_port_num; bool ext; int err; @@ -490,9 +491,12 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; - translate_eth_proto_oper(eth_prot_oper, &props->active_speed, + translate_eth_proto_oper(eth_prot_oper, &active_speed, &props->active_width, ext); + WARN_ON_ONCE(active_speed & ~0xFF); + props->active_speed = (u8)active_speed; + props->port_cap_flags |= IB_PORT_CM_SUP; props->ip_gids = true; @@ -1183,8 +1187,8 @@ enum mlx5_ib_width { MLX5_IB_WIDTH_12X = 1 << 4 }; -static void translate_active_width(struct ib_device *ibdev, u8 active_width, - u8 *ib_width) +static void translate_active_width(struct ib_device *ibdev, u16 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); @@ -1277,7 +1281,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, u16 max_mtu; u16 oper_mtu; int err; - u8 ib_link_width_oper; + u16 ib_link_width_oper; u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1310,16 +1314,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) props->port_cap_flags2 = rep->cap_mask2; - err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, + (u16 *)&props->active_speed, port); if (err) goto out; translate_active_width(ibdev, ib_link_width_oper, &props->active_width); - err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); - if (err) - goto out; - mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 1eef66ee849e..17f5be801d2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -174,24 +174,6 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) } } -static int mlx5i_get_port_settings(struct net_device *netdev, - u16 *ib_link_width_oper, u16 *ib_proto_oper) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - int ret; - - ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); - if (ret) - return ret; - - *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); - *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); - - return 0; -} - static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) { int rate, width; @@ -209,11 +191,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) static int mlx5i_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; u16 ib_link_width_oper; u16 ib_proto_oper; int speed, ret; - ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper, + 1); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e4186e84b3ff..4bb219565c58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -154,24 +154,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, - u8 *link_width_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); - if (err) - return err; - - *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); - -int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, u8 local_port) +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; @@ -181,11 +165,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, if (err) return err; + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); return 0; } -EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); +EXPORT_SYMBOL(mlx5_query_ib_port_oper); /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 2d45a6af52a4..4d33ae0c2d97 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -133,10 +133,9 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, - u8 *link_width_oper, u8 local_port); -int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, u8 local_port); + +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); -- cgit v1.2.3-59-g8ed1b From e27014bdb47eb435f78573685f4196c07329f1f7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 17 Sep 2020 12:02:22 +0300 Subject: RDMA/mlx5: Delete duplicated mlx5_ptys_width enum Combine two same enums to avoid duplication. Signed-off-by: Aharon Landau Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 20 ++++++-------------- .../net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 8 -------- include/linux/mlx5/port.h | 8 ++++++++ 3 files changed, 14 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 631ca360057f..2dbacf0ebda3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1179,32 +1179,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, return 0; } -enum mlx5_ib_width { - MLX5_IB_WIDTH_1X = 1 << 0, - MLX5_IB_WIDTH_2X = 1 << 1, - MLX5_IB_WIDTH_4X = 1 << 2, - MLX5_IB_WIDTH_8X = 1 << 3, - MLX5_IB_WIDTH_12X = 1 << 4 -}; - static void translate_active_width(struct ib_device *ibdev, u16 active_width, u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - if (active_width & MLX5_IB_WIDTH_1X) + if (active_width & MLX5_PTYS_WIDTH_1X) *ib_width = IB_WIDTH_1X; - else if (active_width & MLX5_IB_WIDTH_2X) + else if (active_width & MLX5_PTYS_WIDTH_2X) *ib_width = IB_WIDTH_2X; - else if (active_width & MLX5_IB_WIDTH_4X) + else if (active_width & MLX5_PTYS_WIDTH_4X) *ib_width = IB_WIDTH_4X; - else if (active_width & MLX5_IB_WIDTH_8X) + else if (active_width & MLX5_PTYS_WIDTH_8X) *ib_width = IB_WIDTH_8X; - else if (active_width & MLX5_IB_WIDTH_12X) + else if (active_width & MLX5_PTYS_WIDTH_12X) *ib_width = IB_WIDTH_12X; else { mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", - (int)active_width); + active_width); *ib_width = IB_WIDTH_4X; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 17f5be801d2f..cac8f085b16d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -130,14 +130,6 @@ static int mlx5i_flash_device(struct net_device *netdev, return mlx5e_ethtool_flash_device(priv, flash); } -enum mlx5_ptys_width { - MLX5_PTYS_WIDTH_1X = 1 << 0, - MLX5_PTYS_WIDTH_2X = 1 << 1, - MLX5_PTYS_WIDTH_4X = 1 << 2, - MLX5_PTYS_WIDTH_8X = 1 << 3, - MLX5_PTYS_WIDTH_12X = 1 << 4, -}; - static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) { switch (width) { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 4d33ae0c2d97..23edd2db4803 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -125,6 +125,14 @@ enum mlx5e_connector_type { MLX5E_CONNECTOR_TYPE_NUMBER, }; +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ -- cgit v1.2.3-59-g8ed1b