diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 172 |
1 files changed, 119 insertions, 53 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 66a9a90f9558..b32906e1ab55 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -259,10 +259,11 @@ struct bpf_local_storage; * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux + * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst + * @sk_rx_dst_cookie: cookie for @sk_rx_dst * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy - * @sk_rx_skb_cache: cache copy of recently accessed RX skb * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -270,6 +271,7 @@ struct bpf_local_storage; * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward + * @sk_reserved_mem: space reserved and non-reclaimable for the socket * @sk_napi_id: id of the last napi context to receive data for sk * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode @@ -307,6 +309,7 @@ struct bpf_local_storage; * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family + * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred * @sk_peer_pid: &struct pid for this socket's peer * @sk_peer_cred: %SO_PEERCRED setting * @sk_rcvlowat: %SO_RCVLOWAT setting @@ -328,7 +331,6 @@ struct bpf_local_storage; * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] - * @sk_tx_skb_cache: cache copy of recently accessed TX skb * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup @@ -393,7 +395,6 @@ struct sock { atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; - struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -412,6 +413,7 @@ struct sock { #define sk_rmem_alloc sk_backlog.rmem_alloc int sk_forward_alloc; + u32 sk_reserved_mem; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_ll_usec; /* ===== mostly read cache line ===== */ @@ -430,6 +432,9 @@ struct sock { struct xfrm_policy __rcu *sk_policy[2]; #endif struct dst_entry *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -442,7 +447,6 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; - struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -488,8 +492,10 @@ struct sock { u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; #endif + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 @@ -1204,13 +1210,16 @@ struct proto { unsigned int inuse_idx; #endif + int (*forward_alloc_get)(const struct sock *sk); + bool (*stream_memory_free)(const struct sock *sk, int wake); - bool (*stream_memory_read)(const struct sock *sk); + bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ + /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. @@ -1234,7 +1243,7 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - struct percpu_counter *orphan_count; + unsigned int __percpu *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; @@ -1288,20 +1297,22 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); +static inline int sk_forward_alloc_get(const struct sock *sk) +{ + if (!sk->sk_prot->forward_alloc_get) + return sk->sk_forward_alloc; + + return sk->sk_prot->forward_alloc_get(sk); +} + static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; -#ifdef CONFIG_INET - return sk->sk_prot->stream_memory_free ? - INDIRECT_CALL_1(sk->sk_prot->stream_memory_free, - tcp_stream_memory_free, - sk, wake) : true; -#else return sk->sk_prot->stream_memory_free ? - sk->sk_prot->stream_memory_free(sk, wake) : true; -#endif + INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free, + tcp_stream_memory_free, sk, wake) : true; } static inline bool sk_stream_memory_free(const struct sock *sk) @@ -1515,20 +1526,49 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) skb_pfmemalloc(skb); } +static inline int sk_unused_reserved_mem(const struct sock *sk) +{ + int unused_mem; + + if (likely(!sk->sk_reserved_mem)) + return 0; + + unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued - + atomic_read(&sk->sk_rmem_alloc); + + return unused_mem > 0 ? unused_mem : 0; +} + static inline void sk_mem_reclaim(struct sock *sk) { + int reclaimable; + if (!sk_has_account(sk)) return; - if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, sk->sk_forward_alloc); + + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + + if (reclaimable >= SK_MEM_QUANTUM) + __sk_mem_reclaim(sk, reclaimable); +} + +static inline void sk_mem_reclaim_final(struct sock *sk) +{ + sk->sk_reserved_mem = 0; + sk_mem_reclaim(sk); } static inline void sk_mem_reclaim_partial(struct sock *sk) { + int reclaimable; + if (!sk_has_account(sk)) return; - if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); + + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + + if (reclaimable > SK_MEM_QUANTUM) + __sk_mem_reclaim(sk, reclaimable - 1); } static inline void sk_mem_charge(struct sock *sk, int size) @@ -1538,11 +1578,19 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } +/* the following macros control memory reclaiming in sk_mem_uncharge() + */ +#define SK_RECLAIM_THRESHOLD (1 << 21) +#define SK_RECLAIM_CHUNK (1 << 20) + static inline void sk_mem_uncharge(struct sock *sk, int size) { + int reclaimable; + if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); /* Avoid a possible overflow. * TCP send queues can make this happen, if sk_mem_reclaim() @@ -1551,23 +1599,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) * If we reach 2 MBytes, reclaim 1 MBytes right now, there is * no need to hold that much forward allocation anyway. */ - if (unlikely(sk->sk_forward_alloc >= 1 << 21)) - __sk_mem_reclaim(sk, 1 << 20); -} - -DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); -static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) -{ - sk_wmem_queued_add(sk, -skb->truesize); - sk_mem_uncharge(sk, skb->truesize); - if (static_branch_unlikely(&tcp_tx_skb_cache_key) && - !sk->sk_tx_skb_cache && !skb_cloned(skb)) { - skb_ext_reset(skb); - skb_zcopy_clear(skb, true); - sk->sk_tx_skb_cache = skb; - return; - } - __kfree_skb(skb); + if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) + __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); } static inline void sock_release_ownership(struct sock *sk) @@ -1623,7 +1656,36 @@ void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); +bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); + +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken: + * + * sk_lock.slock locked, owned = 0, BH disabled + * + * return true if slow path is taken: + * + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +static inline bool lock_sock_fast(struct sock *sk) +{ + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} + +/* fast socket lock variant for caller already holding a [different] socket lock */ +static inline bool lock_sock_fast_nested(struct sock *sk) +{ + mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} /** * unlock_sock_fast - complement of lock_sock_fast @@ -1640,6 +1702,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) release_sock(sk); __release(&sk->sk_lock.slock); } else { + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); spin_unlock_bh(&sk->sk_lock.slock); } } @@ -1856,10 +1919,8 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); - if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING)) - return; - - sk->sk_rx_queue_mapping = rx_queue; + if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) + WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue); } #endif } @@ -1867,15 +1928,19 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) static inline void sk_rx_queue_clear(struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; + WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING); #endif } static inline int sk_rx_queue_get(const struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_rx_queue_mapping; + if (sk) { + int res = READ_ONCE(sk->sk_rx_queue_mapping); + + if (res != NO_QUEUE_MAPPING) + return res; + } #endif return -1; @@ -2355,13 +2420,11 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) return; val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); + val = max_t(u32, val, sk_unused_reserved_mem(sk)); WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - bool force_schedule); - /** * sk_page_frag - return an appropriate page_frag * @sk: socket @@ -2575,7 +2638,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } -DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2587,12 +2649,6 @@ DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (static_branch_unlikely(&tcp_rx_skb_cache_key) && - !sk->sk_rx_skb_cache) { - sk->sk_rx_skb_cache = skb; - skb_orphan(skb); - return; - } __kfree_skb(skb); } @@ -2787,4 +2843,14 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +int sock_get_timeout(long timeo, void *optval, bool old_timeval); +int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, + sockptr_t optval, int optlen, bool old_timeval); + +static inline bool sk_is_readable(struct sock *sk) +{ + if (sk->sk_prot->sock_is_readable) + return sk->sk_prot->sock_is_readable(sk); + return false; +} #endif /* _SOCK_H */ |