From 218af599fa635b107cfe10acf3249c4dfe5e4123 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 May 2017 04:24:36 -0700 Subject: tcp: internal implementation for pacing BBR congestion control depends on pacing, and pacing is currently handled by sch_fq packet scheduler for performance reasons, and also because implemening pacing with FQ was convenient to truly avoid bursts. However there are many cases where this packet scheduler constraint is not practical. - Many linux hosts are not focusing on handling thousands of TCP flows in the most efficient way. - Some routers use fq_codel or other AQM, but still would like to use BBR for the few TCP flows they initiate/terminate. This patch implements an automatic fallback to internal pacing. Pacing is requested either by BBR or use of SO_MAX_PACING_RATE option. If sch_fq happens to be in the egress path, pacing is delegated to the qdisc, otherwise pacing is done by TCP itself. One advantage of pacing from TCP stack is to get more precise rtt estimations, and less work done from TX completion, since TCP Small queue limits are not generally hit. Setups with single TX queue but many cpus might even benefit from this. Note that unlike sch_fq, we do not take into account header sizes. Taking care of these headers would add additional complexity for no practical differences in behavior. Some performance numbers using 800 TCP_STREAM flows rate limited to ~48 Mbit per second on 40Gbit NIC. If MQ+pfifo_fast is used on the NIC : $ sar -n DEV 1 5 | grep eth 14:48:44 eth0 725743.00 2932134.00 46776.76 4335184.68 0.00 0.00 1.00 14:48:45 eth0 725349.00 2932112.00 46751.86 4335158.90 0.00 0.00 0.00 14:48:46 eth0 725101.00 2931153.00 46735.07 4333748.63 0.00 0.00 0.00 14:48:47 eth0 725099.00 2931161.00 46735.11 4333760.44 0.00 0.00 1.00 14:48:48 eth0 725160.00 2931731.00 46738.88 4334606.07 0.00 0.00 0.00 Average: eth0 725290.40 2931658.20 46747.54 4334491.74 0.00 0.00 0.40 $ vmstat 1 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 4 0 0 259825920 45644 2708324 0 0 21 2 247 98 0 0 100 0 0 4 0 0 259823744 45644 2708356 0 0 0 0 2400825 159843 0 19 81 0 0 0 0 0 259824208 45644 2708072 0 0 0 0 2407351 159929 0 19 81 0 0 1 0 0 259824592 45644 2708128 0 0 0 0 2405183 160386 0 19 80 0 0 1 0 0 259824272 45644 2707868 0 0 0 32 2396361 158037 0 19 81 0 0 Now use MQ+FQ : lpaa23:~# echo fq >/proc/sys/net/core/default_qdisc lpaa23:~# tc qdisc replace dev eth0 root mq $ sar -n DEV 1 5 | grep eth 14:49:57 eth0 678614.00 2727930.00 43739.13 4033279.14 0.00 0.00 0.00 14:49:58 eth0 677620.00 2723971.00 43674.69 4027429.62 0.00 0.00 1.00 14:49:59 eth0 676396.00 2719050.00 43596.83 4020125.02 0.00 0.00 0.00 14:50:00 eth0 675197.00 2714173.00 43518.62 4012938.90 0.00 0.00 1.00 14:50:01 eth0 676388.00 2719063.00 43595.47 4020171.64 0.00 0.00 0.00 Average: eth0 676843.00 2720837.40 43624.95 4022788.86 0.00 0.00 0.40 $ vmstat 1 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 2 0 0 259832240 46008 2710912 0 0 21 2 223 192 0 1 99 0 0 1 0 0 259832896 46008 2710744 0 0 0 0 1702206 198078 0 17 82 0 0 0 0 0 259830272 46008 2710596 0 0 0 0 1696340 197756 1 17 83 0 0 4 0 0 259829168 46024 2710584 0 0 16 0 1688472 197158 1 17 82 0 0 3 0 0 259830224 46024 2710408 0 0 0 0 1692450 197212 0 18 82 0 0 As expected, number of interrupts per second is very different. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Van Jacobson Cc: Jerry Chu Signed-off-by: David S. Miller --- net/core/sock.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/sock.c') diff --git a/net/core/sock.c b/net/core/sock.c index e43e71d7856b..93d011e35b83 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1041,6 +1041,10 @@ set_rcvbuf: #endif case SO_MAX_PACING_RATE: + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); -- cgit v1.2.3-59-g8ed1b From 0604475119de5f80dc051a5db055c6a2a75bd542 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Jun 2017 13:29:12 -0700 Subject: tcp: add TCPMemoryPressuresChrono counter DRAM supply shortage and poor memory pressure tracking in TCP stack makes any change in SO_SNDBUF/SO_RCVBUF (or equivalent autotuning limits) and tcp_mem[] quite hazardous. TCPMemoryPressures SNMP counter is an indication of tcp_mem sysctl limits being hit, but only tracking number of transitions. If TCP stack behavior under stress was perfect : 1) It would maintain memory usage close to the limit. 2) Memory pressure state would be entered for short times. We certainly prefer 100 events lasting 10ms compared to one event lasting 200 seconds. This patch adds a new SNMP counter tracking cumulative duration of memory pressure events, given in ms units. $ cat /proc/sys/net/ipv4/tcp_mem 3088 4117 6176 $ grep TCP /proc/net/sockstat TCP: inuse 180 orphan 0 tw 2 alloc 234 mem 4140 $ nstat -n ; sleep 10 ; nstat |grep Pressure TcpExtTCPMemoryPressures 1700 TcpExtTCPMemoryPressuresChrono 5209 v2: Used EXPORT_SYMBOL_GPL() instead of EXPORT_SYMBOL() as David instructed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 22 ++-------------------- include/net/tcp.h | 3 ++- include/uapi/linux/snmp.h | 1 + net/core/sock.c | 20 ++++++++++++++++++++ net/decnet/af_decnet.c | 2 +- net/ipv4/proc.c | 1 + net/ipv4/tcp.c | 31 +++++++++++++++++++++++++------ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + net/sctp/socket.c | 2 +- 10 files changed, 55 insertions(+), 29 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index 3467d9e89e7d..858891c36f94 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1080,6 +1080,7 @@ struct proto { bool (*stream_memory_free)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); + void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1088,7 +1089,7 @@ struct proto { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ - int *memory_pressure; + unsigned long *memory_pressure; long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; @@ -1193,25 +1194,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return !!*sk->sk_prot->memory_pressure; } -static inline void sk_leave_memory_pressure(struct sock *sk) -{ - int *memory_pressure = sk->sk_prot->memory_pressure; - - if (!memory_pressure) - return; - - if (*memory_pressure) - *memory_pressure = 0; -} - -static inline void sk_enter_memory_pressure(struct sock *sk) -{ - if (!sk->sk_prot->enter_memory_pressure) - return; - - sk->sk_prot->enter_memory_pressure(sk); -} - static inline long sk_memory_allocated(const struct sock *sk) { diff --git a/include/net/tcp.h b/include/net/tcp.h index aec092560d9b..3ab677d11d02 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -276,7 +276,7 @@ extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; -extern int tcp_memory_pressure; +extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) @@ -1320,6 +1320,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req, const struct dst_entry *dst); void tcp_enter_memory_pressure(struct sock *sk); +void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 95cffcb21dfd..d85693295798 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -228,6 +228,7 @@ enum LINUX_MIB_TCPABORTONLINGER, /* TCPAbortOnLinger */ LINUX_MIB_TCPABORTFAILED, /* TCPAbortFailed */ LINUX_MIB_TCPMEMORYPRESSURES, /* TCPMemoryPressures */ + LINUX_MIB_TCPMEMORYPRESSURESCHRONO, /* TCPMemoryPressuresChrono */ LINUX_MIB_TCPSACKDISCARD, /* TCPSACKDiscard */ LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ diff --git a/net/core/sock.c b/net/core/sock.c index bef844127e01..ad8a4bc84126 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2076,6 +2076,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, } EXPORT_SYMBOL(sock_cmsg_send); +static void sk_enter_memory_pressure(struct sock *sk) +{ + if (!sk->sk_prot->enter_memory_pressure) + return; + + sk->sk_prot->enter_memory_pressure(sk); +} + +static void sk_leave_memory_pressure(struct sock *sk) +{ + if (sk->sk_prot->leave_memory_pressure) { + sk->sk_prot->leave_memory_pressure(sk); + } else { + unsigned long *memory_pressure = sk->sk_prot->memory_pressure; + + if (memory_pressure && *memory_pressure) + *memory_pressure = 0; + } +} + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 405483a07efc..73a0399dc7a2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -447,7 +447,7 @@ static void dn_destruct(struct sock *sk) dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); } -static int dn_memory_pressure; +static unsigned long dn_memory_pressure; static void dn_enter_memory_pressure(struct sock *sk) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fa44e752a9a3..43eb6567b3a0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO), SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 87981fcdfcf2..cc8fd8b747a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -320,17 +320,36 @@ struct tcp_splice_state { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ -int tcp_memory_pressure __read_mostly; -EXPORT_SYMBOL(tcp_memory_pressure); +unsigned long tcp_memory_pressure __read_mostly; +EXPORT_SYMBOL_GPL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) { - if (!tcp_memory_pressure) { + unsigned long val; + + if (tcp_memory_pressure) + return; + val = jiffies; + + if (!val) + val--; + if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); - tcp_memory_pressure = 1; - } } -EXPORT_SYMBOL(tcp_enter_memory_pressure); +EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); + +void tcp_leave_memory_pressure(struct sock *sk) +{ + unsigned long val; + + if (!tcp_memory_pressure) + return; + val = xchg(&tcp_memory_pressure, 0); + if (val) + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, + jiffies_to_msecs(jiffies - val)); +} +EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13c7ae7d4504..1dc8c449e16a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ struct proto tcp_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a525426fe93..0840543fc245 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1910,6 +1910,7 @@ struct proto tcpv6_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0822046e4f3f..5f58dd03e3ac 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -103,7 +103,7 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -static int sctp_memory_pressure; +static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; -- cgit v1.2.3-59-g8ed1b From 28b5ba2aa0f55d80adb2624564ed2b170c19519e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 21 Jun 2017 10:47:15 +0200 Subject: net: introduce SO_PEERGROUPS getsockopt This adds the new getsockopt(2) option SO_PEERGROUPS on SOL_SOCKET to retrieve the auxiliary groups of the remote peer. It is designed to naturally extend SO_PEERCRED. That is, the underlying data is from the same credentials. Regarding its syntax, it is based on SO_PEERSEC. That is, if the provided buffer is too small, ERANGE is returned and @optlen is updated. Otherwise, the information is copied, @optlen is set to the actual size, and 0 is returned. While SO_PEERCRED (and thus `struct ucred') already returns the primary group, it lacks the auxiliary group vector. However, nearly all access controls (including kernel side VFS and SYSVIPC, but also user-space polkit, DBus, ...) consider the entire set of groups, rather than just the primary group. But this is currently not possible with pure SO_PEERCRED. Instead, user-space has to work around this and query the system database for the auxiliary groups of a UID retrieved via SO_PEERCRED. Unfortunately, there is no race-free way to query the auxiliary groups of the PID/UID retrieved via SO_PEERCRED. Hence, the current user-space solution is to use getgrouplist(3p), which itself falls back to NSS and whatever is configured in nsswitch.conf(3). This effectively checks which groups we *would* assign to the user if it logged in *now*. On normal systems it is as easy as reading /etc/group, but with NSS it can resort to quering network databases (eg., LDAP), using IPC or network communication. Long story short: Whenever we want to use auxiliary groups for access checks on IPC, we need further IPC to talk to the user/group databases, rather than just relying on SO_PEERCRED and the incoming socket. This is unfortunate, and might even result in dead-locks if the database query uses the same IPC as the original request. So far, those recursions / dead-locks have been avoided by using primitive IPC for all crucial NSS modules. However, we want to avoid re-inventing the wheel for each NSS module that might be involved in user/group queries. Hence, we would preferably make DBus (and other IPC that supports access-management based on groups) work without resorting to the user/group database. This new SO_PEERGROUPS ioctl would allow us to make dbus-daemon work without ever calling into NSS. Cc: Michal Sekletar Cc: Simon McVittie Reviewed-by: Tom Gundersen Signed-off-by: David Herrmann Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 33 +++++++++++++++++++++++++++++++++ 12 files changed, 55 insertions(+) (limited to 'net/core/sock.c') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 0926de63a62b..7b285dd4fe05 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -107,4 +107,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index e491ff08b9a9..f1e3b20dce9f 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -100,5 +100,7 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 869372413333..5dd5c5d0d642 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -109,4 +109,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 5d97890a8704..f8f7b47e247f 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -100,4 +100,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 365ff51f033a..882823bec153 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -118,4 +118,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index d013c0da0256..c710db354ff2 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -100,4 +100,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index b893ca14fade..a0d4dc9f4eb2 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -99,4 +99,6 @@ #define SCM_TIMESTAMPING_PKTINFO 0x4033 +#define SO_PEERGROUPS 0x4034 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index fb9769d7e74e..52a63f4175cb 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -106,4 +106,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 5d673302fd41..186fd8199f54 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -96,6 +96,8 @@ #define SCM_TIMESTAMPING_PKTINFO 0x003c +#define SO_PEERGROUPS 0x003d + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 982c2533f912..3eed2761c149 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -111,4 +111,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index a5f6e819fafd..9861be8da65e 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -102,4 +102,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index ad8a4bc84126..6f4b090241c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1078,6 +1078,18 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, } } +static int groups_to_user(gid_t __user *dst, const struct group_info *src) +{ + struct user_namespace *user_ns = current_user_ns(); + int i; + + for (i = 0; i < src->ngroups; i++) + if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) + return -EFAULT; + + return 0; +} + int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -1231,6 +1243,27 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + case SO_PEERGROUPS: + { + int ret, n; + + if (!sk->sk_peer_cred) + return -ENODATA; + + n = sk->sk_peer_cred->group_info->ngroups; + if (len < n * sizeof(gid_t)) { + len = n * sizeof(gid_t); + return put_user(len, optlen) ? -EFAULT : -ERANGE; + } + len = n * sizeof(gid_t); + + ret = groups_to_user((gid_t __user *)optval, + sk->sk_peer_cred->group_info); + if (ret) + return ret; + goto lenout; + } + case SO_PEERNAME: { char address[128]; -- cgit v1.2.3-59-g8ed1b From 14afee4b6092fde451ee17604e5f5c89da33e71e Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:00 +0300 Subject: net: convert sock.sk_wmem_alloc from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 12 +----------- drivers/atm/he.c | 2 +- drivers/atm/idt77252.c | 4 ++-- include/linux/atmdev.h | 2 +- include/net/sock.h | 8 ++++---- net/atm/br2684.c | 2 +- net/atm/clip.c | 2 +- net/atm/common.c | 10 +++++----- net/atm/lec.c | 4 ++-- net/atm/mpc.c | 4 ++-- net/atm/pppoatm.c | 2 +- net/atm/raw.c | 2 +- net/atm/signaling.c | 2 +- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 26 +++++++++++++------------- net/ipv4/af_inet.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ip_output.c | 6 +++--- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_offload.c | 2 +- net/ipv4/tcp_output.c | 15 +++++++-------- net/ipv6/esp6.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/kcm/kcmproc.c | 2 +- net/key/af_key.c | 2 +- net/netlink/af_netlink.c | 2 +- net/packet/af_packet.c | 4 ++-- net/phonet/socket.c | 2 +- net/rds/tcp_send.c | 2 +- net/rxrpc/af_rxrpc.c | 4 ++-- net/sched/sch_atm.c | 2 +- net/sctp/output.c | 2 +- net/sctp/proc.c | 2 +- net/sctp/socket.c | 4 ++-- net/unix/af_unix.c | 6 +++--- 37 files changed, 74 insertions(+), 85 deletions(-) (limited to 'net/core/sock.c') diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 7584ae1ded85..f0433adcd8fc 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e) else { dev_kfree_skb_any(entry->skb); } -#if 1 - /* race fixed by the above incarnation mechanism, but... */ - if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) { - atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0); - } -#endif + /* check error condition */ if (*entry->status & STATUS_ERROR) atomic_inc(&vcc->stats->tx_err); @@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp return -ENOMEM; } - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); - vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); - return 0; } @@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned long flags; ASSERT(vcc); - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); ASSERT(fore200e); ASSERT(fore200e_vcc); diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 461da2bce8ef..37ee21c5a5ca 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc) * TBRQ, the host issues the close command to the adapter. */ - while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) && + while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) && (retry < MAX_RETRY)) { msleep(sleep); if (sleep < 250) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 4e64de380bda..60bacba03d17 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); vc->estimator->cells += (skb->len + 47) / 48; - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > (sk->sk_sndbuf >> 1)) { u32 cps = vc->estimator->maxcps; @@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags) atomic_inc(&vcc->stats->tx_err); return -ENOMEM; } - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); skb_put_data(skb, cell, 52); diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index c1da539f5e28..4d97a89da066 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize) static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size) { - return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) < + return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) < sk_atm(vcc)->sk_sndbuf; } diff --git a/include/net/sock.h b/include/net/sock.h index 00d09140e354..5284e50fc81a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -390,7 +390,7 @@ struct sock { /* ===== cache line for TX ===== */ int sk_wmem_queued; - atomic_t sk_wmem_alloc; + refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; @@ -1911,7 +1911,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - 1; } /** @@ -2055,7 +2055,7 @@ static inline unsigned long sock_wspace(struct sock *sk) int amt = 0; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc); if (amt < 0) amt = 0; } @@ -2136,7 +2136,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } static inline gfp_t gfp_any(void) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fca84e111c89..4e111196f902 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = atmvcc->atm_options; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index 47c36f449df0..f271a7bcf5b2 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); diff --git a/net/atm/common.c b/net/atm/common.c index f06422f4108d..8a4f99114cd2 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk) printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_rmem_alloc)); - if (atomic_read(&sk->sk_wmem_alloc)) + if (refcount_read(&sk->sk_wmem_alloc)) printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n", - __func__, atomic_read(&sk->sk_wmem_alloc)); + __func__, refcount_read(&sk->sk_wmem_alloc)); } static void vcc_def_wakeup(struct sock *sk) @@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); return (vcc->qos.txtp.max_sdu + - atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; + refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; } static void vcc_write_space(struct sock *sk) @@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; @@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->atm_options = vcc->atm_options; diff --git a/net/atm/lec.c b/net/atm/lec.c index 09cfe87f0a44..75545717fa46 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->vcc = vcc; ATM_SKB(skb)->atm_options = vcc->atm_options; - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; @@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) int i; char *tmp; /* FIXME */ - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index a190800572bd..680a4b9095a1 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; @@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb) struct mpoa_client *mpc = find_mpc_by_vcc(vcc); struct k_message *mesg = (struct k_message *)skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); if (mpc == NULL) { pr_info("no mpc found\n"); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index c4e09846d1de..21d9d341a619 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); diff --git a/net/atm/raw.c b/net/atm/raw.c index 2e17e97a7a8b..821c0797553d 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index f640a99e14b8..983c3a21a133 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk; msg = (struct atmsvc_msg *) skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); vcc = *(struct atm_vcc **) &msg->vcc; pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc); sk = sk_atm(vcc); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 7506b853a84d..632d5a416d97 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = { static void caif_sock_destructor(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - caif_assert(!atomic_read(&sk->sk_wmem_alloc)); + caif_assert(!refcount_read(&sk->sk_wmem_alloc)); caif_assert(sk_unhashed(sk)); caif_assert(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 95d43543ac91..454ec8923333 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - atomic_add(truesize, &skb->sk->sk_wmem_alloc); + refcount_add(truesize, &skb->sk->sk_wmem_alloc); while (copied) { int size = min_t(int, copied, PAGE_SIZE - start); skb_fill_page_desc(skb, frag++, pages[n], start, size); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 659dfc0494c5..c267713cd383 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, get_page(pfrag->page); skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; diff --git a/net/core/sock.c b/net/core/sock.c index 6f4b090241c1..0866d59489cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); @@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_destruct(sk); filter = rcu_dereference_check(sk->sk_filter, - atomic_read(&sk->sk_wmem_alloc) == 0); + refcount_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); @@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk) * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ - if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + if (refcount_dec_and_test(&sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sk_free); @@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ - atomic_set(&newsk->sk_wmem_alloc, 1); + refcount_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); @@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb) * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call */ - atomic_sub(len - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); sk->sk_write_space(sk); len = 1; } @@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb) * if sk_wmem_alloc reaches 0, we must finish what sk_free() * could not do because of in-flight packets */ - if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) __sk_free(sk); } @@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) * is enough to guarantee sk_free() wont free this sock until * all in-flight packets are completed */ - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(skb_set_owner_w); @@ -1852,7 +1852,7 @@ void skb_orphan_partial(struct sk_buff *skb) struct sock *sk = skb->sk; if (atomic_inc_not_zero(&sk->sk_refcnt)) { - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); skb->destructor = sock_efree; } } else { @@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); @@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) return 1; - } else if (atomic_read(&sk->sk_wmem_alloc) < + } else if (refcount_read(&sk->sk_wmem_alloc) < prot->sysctl_wmem[0]) return 1; } @@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58925b6597de..76c2077c3f5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1f18b4650253..0cbee0a666ff 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 532b36e9ce2a..2e61e2af251a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1037,7 +1037,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, @@ -1145,7 +1145,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; @@ -1369,7 +1369,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->len += len; skb->data_len += len; skb->truesize += len; - atomic_add(len, &sk->sk_wmem_alloc); + refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4793fb78d93b..fae45e402742 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && - atomic_read(&sk->sk_wmem_alloc) > skb->truesize; + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } static void tcp_push(struct sock *sk, int flags, int mss_now, @@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, /* It is possible TX completion already happened * before we set TSQ_THROTTLED. */ - if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) return; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..11f69bbf9307 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - atomic_add(sum_truesize - gso_skb->truesize, + refcount_add(sum_truesize - gso_skb->truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a9c395b6235..1d79137f3795 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,12 +861,11 @@ void tcp_wfree(struct sk_buff *skb) struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nval, oval; - int wmem; /* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ - wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); /* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. @@ -875,7 +874,7 @@ void tcp_wfree(struct sk_buff *skb) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ - if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { @@ -925,7 +924,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) if (nval != oval) continue; - if (!atomic_inc_not_zero(&sk->sk_wmem_alloc)) + if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) break; /* queue this socket to tasklet queue */ tsq = this_cpu_ptr(&tsq_tasklet); @@ -1045,7 +1044,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -2176,7 +2175,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; - if (atomic_read(&sk->sk_wmem_alloc) > limit) { + if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send the 1st or 2nd skb in write queue. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. @@ -2192,7 +2191,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (refcount_read(&sk->sk_wmem_alloc) > limit) return true; } return false; @@ -2812,7 +2811,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. */ - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 71faffdd55d9..9ed35473dcb5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f2657d98e82..1422d6c08377 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1472,7 +1472,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, @@ -1581,7 +1581,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index bf75c9231cca..c343ac60bf50 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, psock->sk->sk_receive_queue.qlen, atomic_read(&psock->sk->sk_rmem_alloc), psock->sk->sk_write_queue.qlen, - atomic_read(&psock->sk->sk_wmem_alloc)); + refcount_read(&psock->sk->sk_wmem_alloc)); if (psock->done) seq_puts(seq, "Done "); diff --git a/net/key/af_key.c b/net/key/af_key.c index 287964a570e9..e466579c18fa 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 05030ad1a36c..8ced52e91181 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(nlk_sk(sk)->groups); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f9349a495caf..90fd38d5c458 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); @@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; - atomic_add(to_write, &po->sk.sk_wmem_alloc); + refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 64634e3ec2fc..29c7f754c70d 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, return POLLHUP; if (sk->sk_state == TCP_ESTABLISHED && - atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && + refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && atomic_read(&pn->tx_credits)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 52d11d7725c8..0d8616aa5bad 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 58ae0db52ea1..f1299f54627a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *); */ static inline int rxrpc_writable(struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; + return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; } /* @@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index de162592eee0..572fe2584e48 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data) ATM_SKB(skb)->vcc = flow->vcc; memcpy(skb_push(skb, flow->hdr_len), flow->hdr, flow->hdr_len); - atomic_add(skb->truesize, + refcount_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ flow->vcc->send(flow->vcc, skb); diff --git a/net/sctp/output.c b/net/sctp/output.c index 89cee1482d35..9bf9d84a96b7 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) * therefore only reserve a single byte to keep socket around until * the packet has been transmitted. */ - atomic_inc(&sk->sk_wmem_alloc); + refcount_inc(&sk->sk_wmem_alloc); } static int sctp_packet_pack(struct sctp_packet *packet, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 8e34db56bc1d..26b4be6b4172 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->stream.outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, - atomic_read(&sk->sk_wmem_alloc), + refcount_read(&sk->sk_wmem_alloc), sk->sk_wmem_queued, sk->sk_sndbuf, sk->sk_rcvbuf); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b497ee8ae279..15401d09efc4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); sk->sk_wmem_queued += chunk->skb->truesize; sk_mem_charge(sk, chunk->skb->truesize); } @@ -7684,7 +7684,7 @@ static void sctp_wfree(struct sk_buff *skb) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc)); /* * This undoes what is done via sctp_set_owner_w and sk_mem_charge diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1a0c961f4ffe..7c2e21ebbedc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && - (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) @@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { @@ -2033,7 +2033,7 @@ alloc_skb: skb->len += size; skb->data_len += size; skb->truesize += size; - atomic_add(size, &sk->sk_wmem_alloc); + refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { err = unix_scm_to_skb(&scm, skb, false); -- cgit v1.2.3-59-g8ed1b From 41c6d650f6537e55a1b53438c646fbc3f49176bf Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:01 +0300 Subject: net: convert sock.sk_refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. This patch uses refcount_inc_not_zero() instead of atomic_inc_not_zero_hint() due to absense of a _hint() version of refcount API. If the hint() version must be used, we might need to revisit API. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- crypto/algif_aead.c | 2 +- include/net/inet_hashtables.h | 4 ++-- include/net/request_sock.h | 9 +++++---- include/net/sock.h | 17 +++++++++-------- net/atm/proc.c | 2 +- net/bluetooth/af_bluetooth.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/core/skbuff.c | 6 +++--- net/core/sock.c | 6 +++--- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/inet_hashtables.c | 4 ++-- net/ipv4/inet_timewait_sock.c | 8 ++++---- net/ipv4/ping.c | 4 ++-- net/ipv4/raw.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 6 +++--- net/ipv4/udp_diag.c | 4 ++-- net/ipv6/datagram.c | 2 +- net/ipv6/inet6_hashtables.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/key/af_key.c | 2 +- net/l2tp/l2tp_debugfs.c | 3 +-- net/llc/llc_conn.c | 8 ++++---- net/llc/llc_sap.c | 2 +- net/netfilter/xt_TPROXY.c | 4 ++-- net/netlink/af_netlink.c | 6 +++--- net/packet/af_packet.c | 2 +- net/phonet/socket.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sched/em_meta.c | 2 +- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 2 +- 35 files changed, 70 insertions(+), 69 deletions(-) (limited to 'net/core/sock.c') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 8af664f7d27c..be117495eb43 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -877,7 +877,7 @@ static void aead_sock_destruct(struct sock *sk) unsigned int ivlen = crypto_aead_ivsize( crypto_aead_reqtfm(&ctx->aead_req)); - WARN_ON(atomic_read(&sk->sk_refcnt) != 0); + WARN_ON(refcount_read(&sk->sk_refcnt) != 0); aead_put_sgl(sk); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1178931288cb..b9e6e0e1f55c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -32,7 +32,7 @@ #include #include -#include +#include #include /* This is for all connections with a full identity, no wildcards. @@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 53ced67c4ae9..23e22054aa60 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; req->rsk_listener = NULL; if (attach_listener) { - if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } @@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; - atomic_set(&req->rsk_refcnt, 0); + refcount_set(&req->rsk_refcnt, 0); return req; } @@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) @@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - if (atomic_dec_and_test(&req->rsk_refcnt)) + if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/include/net/sock.h b/include/net/sock.h index 5284e50fc81a..60200f4f4028 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,6 +66,7 @@ #include #include +#include #include #include #include @@ -219,7 +220,7 @@ struct sock_common { u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ }; - atomic_t skc_refcnt; + refcount_t skc_refcnt; /* private: */ int skc_dontcopy_end[0]; union { @@ -611,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk) static __always_inline void sock_hold(struct sock *sk) { - atomic_inc(&sk->sk_refcnt); + refcount_inc(&sk->sk_refcnt); } /* Ungrab socket in the context, which assumes that socket refcnt @@ -619,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk) */ static __always_inline void __sock_put(struct sock *sk) { - atomic_dec(&sk->sk_refcnt); + refcount_dec(&sk->sk_refcnt); } static inline bool sk_del_node_init(struct sock *sk) @@ -628,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -650,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -1144,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) static inline void sk_refcnt_debug_release(const struct sock *sk) { - if (atomic_read(&sk->sk_refcnt) != 1) + if (refcount_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); + sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); } #else /* SOCK_REFCNT_DEBUG */ #define sk_refcnt_debug_inc(sk) do { } while (0) @@ -1636,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk); /* Ungrab socket and destroy it, if it was the last reference. */ static inline void sock_put(struct sock *sk) { - if (atomic_dec_and_test(&sk->sk_refcnt)) + if (refcount_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } /* Generic version of sock_put(), dealing with all sockets diff --git a/net/atm/proc.c b/net/atm/proc.c index bbb6461a4b7f..27c9c01c537d 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) vcc->flags, sk->sk_err, sk_wmem_alloc_get(sk), sk->sk_sndbuf, sk_rmem_alloc_get(sk), sk->sk_rcvbuf, - atomic_read(&sk->sk_refcnt)); + refcount_read(&sk->sk_refcnt)); } static void svc_info(struct seq_file *seq, struct atm_vcc *vcc) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8a8f77a247e6..91e3ba280706 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, - atomic_read(&sk->sk_refcnt), + refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ac3c650cb234..2172ae509cf1 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk) if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; - BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* Kill poor orphan */ bt_sock_unlink(&rfcomm_sk_list, sk); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c267713cd383..8b11341ed69a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3844,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb) struct sock *sk = skb->sk; struct sk_buff *clone; - if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); @@ -3915,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); @@ -3997,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index 0866d59489cb..ba0ef6a7dbaf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1708,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&newsk->sk_refcnt, 2); + refcount_set(&newsk->sk_refcnt, 2); /* * Increment the counter in the same struct proto as the master @@ -1851,7 +1851,7 @@ void skb_orphan_partial(struct sk_buff *skb) ) { struct sock *sk = skb->sk; - if (atomic_inc_not_zero(&sk->sk_refcnt)) { + if (refcount_inc_not_zero(&sk->sk_refcnt)) { WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); skb->destructor = sock_efree; } @@ -2687,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&sk->sk_refcnt, 1); + refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a3fa1a5b6d98..4089c013cb03 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -756,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, * are committed to memory and refcnt initialized. */ smp_wmb(); - atomic_set(&req->rsk_refcnt, 2 + 1); + refcount_set(&req->rsk_refcnt, 2 + 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e9a59d2d91d4..a4be2c1cb688 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) @@ -287,7 +287,7 @@ begin: continue; if (likely(INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f8aff2c71cde..5b039159e67a 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw) void inet_twsk_put(struct inet_timewait_sock *tw) { - if (atomic_dec_and_test(&tw->tw_refcnt)) + if (refcount_dec_and_test(&tw->tw_refcnt)) inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); @@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 4); + refcount_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, * to a non null value before everything is setup for this * timewait socket. */ - atomic_set(&tw->tw_refcnt, 0); + refcount_set(&tw->tw_refcnt, 0); __module_get(tw->tw_prot->owner); } @@ -278,7 +278,7 @@ restart: atomic_read(&twsk_net(tw)->count)) continue; - if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) continue; if (unlikely((tw->tw_family != family) || diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ccfbce13a633..b8f0db54b197 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); - pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter); + pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt)); sk_common_release(sk); } @@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bdffad875691..b0bb5d0a30bd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 7835bb4a1fab..0905cf04c2a4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, NULL, &own_req); if (child) { - atomic_set(&req->rsk_refcnt, 1); + refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4af82b914dd4..8b1539efaf38 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -214,7 +214,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); - atomic_set(&req->rsk_refcnt, 2); + refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d774bcd9a54b..6ec6900eb300 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2323,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -2349,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 86fad2a14ac4..25294d43e147 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -2242,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb) uh->source, iph->saddr, dif); } - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; @@ -2691,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 9a89c10a55f0..4515836d2a3a 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); err = -ENOENT; @@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, return -EINVAL; } - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c786f5ab961..a1f918713006 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -1041,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d0900918a19e..b13b8f93079d 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -75,7 +75,7 @@ begin: continue; if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) continue; - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { @@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f1a4881d9835..2521690d62d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1809,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -1842,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } static int tcp6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 319aa8ed9cf8..4a3e65626e8b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -916,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) else return; - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index e466579c18fa..edcf1d0f82c8 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3739,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index d100aed3d06f..98a005d0d04a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" : ""); seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count, - tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, + tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, atomic_long_read(&tunnel->stats.tx_packets), diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 9b02c13d258b..5e91b47f0d2a 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -507,7 +507,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_estab_match(sap, daddr, laddr, rc))) { @@ -566,7 +566,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_listener_match(sap, laddr, rc))) { @@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk) skb_queue_purge(&sk->sk_write_queue); skb_queue_purge(&llc->pdu_unack_q); #ifdef LLC_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) { + if (refcount_read(&sk->sk_refcnt) != 1) { printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n", - sk, __func__, atomic_read(&sk->sk_refcnt)); + sk, __func__, refcount_read(&sk->sk_refcnt)); printk(KERN_DEBUG "%d LLC sockets are still alive\n", atomic_read(&llc_sock_nr)); } else { diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 63b6ab056370..d90928f50226 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -329,7 +329,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc))) { diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index df7f1df00330..d767e35fff6b 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, daddr, dport, in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in @@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, daddr, ntohs(dport), in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8ced52e91181..5acee49db90b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk) table = &nl_table[sk->sk_protocol]; if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, netlink_rhashtable_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } @@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head) struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); struct sock *sk = &nlk->sk; - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (nlk->cb_running && nlk->cb.done) { @@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), nlk->cb_running, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), atomic_read(&s->sk_drops), sock_i_ino(s) ); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 90fd38d5c458..643302b37b48 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4495,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), s->sk_type, ntohs(po->num), po->ifindex, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 29c7f754c70d..1b050dd17393 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops)); } seq_pad(seq, '\n'); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f1299f54627a..a2ad4482376f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -747,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); - _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* declare the socket closed for business */ sock_orphan(sk); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index eb0e9bab54c1..d6e97115500b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt) *err = -1; return; } - dst->value = atomic_read(&skb->sk->sk_refcnt); + dst->value = refcount_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1b92b72e812f..101e3597338f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7c2e21ebbedc..c88525403d2e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), 0, s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->sk_type, -- cgit v1.2.3-59-g8ed1b