aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h336
1 files changed, 203 insertions, 133 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index ff9b508d9c5f..5db02546941c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair;
* for struct sock and struct inet_timewait_sock.
*/
struct sock_common {
- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
- * address on 64bit arches : cf INET_MATCH()
- */
union {
__addrpair skc_addrpair;
struct {
@@ -292,7 +289,6 @@ struct sk_filter;
* @sk_pacing_shift: scaling factor for TCP Small Queues
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_backlog: always used with the per-socket spinlock held
- * @defer_list: head of llist storing skbs to be freed
* @sk_callback_lock: used with the callbacks in the end of this struct
* @sk_error_queue: rarely used
* @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
@@ -316,6 +312,7 @@ struct sk_filter;
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_txhash: computed flow hash for use on transmit
+ * @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
@@ -351,6 +348,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
+ * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -416,7 +414,6 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
- struct llist_head defer_list;
#define sk_rmem_alloc sk_backlog.rmem_alloc
@@ -491,6 +488,7 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
+ u8 sk_txrehash;
#ifdef CONFIG_NET_RX_BUSY_POLL
u8 sk_prefer_busy_poll;
u16 sk_busy_poll_budget;
@@ -507,7 +505,7 @@ struct sock {
#endif
u16 sk_tsflags;
u8 sk_shutdown;
- u32 sk_tskey;
+ atomic_t sk_tskey;
atomic_t sk_zckey;
u8 sk_clockid;
@@ -540,6 +538,7 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+ struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -548,14 +547,26 @@ enum sk_pacing {
SK_PACING_FQ = 2,
};
-/* Pointer stored in sk_user_data might not be suitable for copying
- * when cloning the socket. For instance, it can point to a reference
- * counted object. sk_user_data bottom bit is set if pointer must not
- * be copied.
+/* flag bits in sk_user_data
+ *
+ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might
+ * not be suitable for copying when cloning the socket. For instance,
+ * it can point to a reference counted object. sk_user_data bottom
+ * bit is set if pointer must not be copied.
+ *
+ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is
+ * managed/owned by a BPF reuseport array. This bit should be set
+ * when sk_user_data's sk is added to the bpf's reuseport_array.
+ *
+ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in
+ * sk_user_data points to psock type. This bit should be set
+ * when sk_user_data is assigned to a psock object.
*/
#define SK_USER_DATA_NOCOPY 1UL
-#define SK_USER_DATA_BPF 2UL /* Managed by BPF */
-#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF)
+#define SK_USER_DATA_BPF 2UL
+#define SK_USER_DATA_PSOCK 4UL
+#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
+ SK_USER_DATA_PSOCK)
/**
* sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
@@ -568,24 +579,77 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+/**
+ * __locked_read_sk_user_data_with_flags - return the pointer
+ * only if argument flags all has been set in sk_user_data. Otherwise
+ * return NULL
+ *
+ * @sk: socket
+ * @flags: flag bits
+ *
+ * The caller must be holding sk->sk_callback_lock.
+ */
+static inline void *
+__locked_read_sk_user_data_with_flags(const struct sock *sk,
+ uintptr_t flags)
+{
+ uintptr_t sk_user_data =
+ (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
+ lockdep_is_held(&sk->sk_callback_lock));
+
+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
+
+ if ((sk_user_data & flags) == flags)
+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
+ return NULL;
+}
+
+/**
+ * __rcu_dereference_sk_user_data_with_flags - return the pointer
+ * only if argument flags all has been set in sk_user_data. Otherwise
+ * return NULL
+ *
+ * @sk: socket
+ * @flags: flag bits
+ */
+static inline void *
+__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
+ uintptr_t flags)
+{
+ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
+
+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
+
+ if ((sk_user_data & flags) == flags)
+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
+ return NULL;
+}
+
#define rcu_dereference_sk_user_data(sk) \
+ __rcu_dereference_sk_user_data_with_flags(sk, 0)
+#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
({ \
- void *__tmp = rcu_dereference(__sk_user_data((sk))); \
- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \
-})
-#define rcu_assign_sk_user_data(sk, ptr) \
-({ \
- uintptr_t __tmp = (uintptr_t)(ptr); \
- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \
-})
-#define rcu_assign_sk_user_data_nocopy(sk, ptr) \
-({ \
- uintptr_t __tmp = (uintptr_t)(ptr); \
- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
+ uintptr_t __tmp1 = (uintptr_t)(ptr), \
+ __tmp2 = (uintptr_t)(flags); \
+ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
+ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
rcu_assign_pointer(__sk_user_data((sk)), \
- __tmp | SK_USER_DATA_NOCOPY); \
+ __tmp1 | __tmp2); \
})
+#define rcu_assign_sk_user_data(sk, ptr) \
+ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
+
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+ return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+ write_pnet(&sk->sk_net, net);
+}
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
@@ -600,7 +664,7 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
int sk_set_peek_off(struct sock *sk, int val);
-static inline int sk_peek_offset(struct sock *sk, int flags)
+static inline int sk_peek_offset(const struct sock *sk, int flags)
{
if (unlikely(flags & MSG_PEEK)) {
return READ_ONCE(sk->sk_peek_off);
@@ -680,11 +744,6 @@ static inline void sk_node_init(struct hlist_node *node)
node->pprev = NULL;
}
-static inline void sk_nulls_node_init(struct hlist_nulls_node *node)
-{
- node->pprev = NULL;
-}
-
static inline void __sk_del_node(struct sock *sk)
{
__hlist_del(&sk->sk_node);
@@ -808,6 +867,16 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
+static inline void __sk_del_bind2_node(struct sock *sk)
+{
+ __hlist_del(&sk->sk_bind2_node);
+}
+
+static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
+{
+ hlist_add_head(&sk->sk_bind2_node, list);
+}
+
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -825,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+#define sk_for_each_bound_bhash2(__sk, list) \
+ hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
@@ -840,7 +911,7 @@ static inline void sk_add_bind_node(struct sock *sk,
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
pos = rcu_dereference(hlist_next_rcu(pos)))
-static inline struct user_namespace *sk_user_ns(struct sock *sk)
+static inline struct user_namespace *sk_user_ns(const struct sock *sk)
{
/* Careful only use this in a context where these parameters
* can not change and must all be valid, such as recvmsg from
@@ -881,11 +952,12 @@ enum sock_flags {
SOCK_TXTIME,
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
+ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
+static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
{
nsk->sk_flags = osk->sk_flags;
}
@@ -1188,8 +1260,7 @@ struct proto {
int (*sendmsg)(struct sock *sk, struct msghdr *msg,
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags,
- int *addr_len);
+ size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*bind)(struct sock *sk,
@@ -1231,6 +1302,7 @@ struct proto {
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
atomic_long_t *memory_allocated; /* Current allocated memory. */
+ int __percpu *per_cpu_fw_alloc;
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
/*
@@ -1374,21 +1446,46 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
}
static inline long
-sk_memory_allocated(const struct sock *sk)
+proto_memory_allocated(const struct proto *prot)
{
- return atomic_long_read(sk->sk_prot->memory_allocated);
+ return max(0L, atomic_long_read(prot->memory_allocated));
}
static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+ return proto_memory_allocated(sk->sk_prot);
+}
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
{
- return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+ int local_reserve;
+
+ preempt_disable();
+ local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+ if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+ atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+ }
+ preempt_enable();
}
static inline void
sk_memory_allocated_sub(struct sock *sk, int amt)
{
- atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+ int local_reserve;
+
+ preempt_disable();
+ local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+ if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+ atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+ }
+ preempt_enable();
}
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
@@ -1417,12 +1514,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
return percpu_counter_sum_positive(prot->sockets_allocated);
}
-static inline long
-proto_memory_allocated(struct proto *prot)
-{
- return atomic_long_read(prot->memory_allocated);
-}
-
static inline bool
proto_memory_pressure(struct proto *prot)
{
@@ -1509,30 +1600,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind);
void __sk_mem_reduce_allocated(struct sock *sk, int amount);
void __sk_mem_reclaim(struct sock *sk, int amount);
-/* We used to have PAGE_SIZE here, but systems with 64KB pages
- * do not necessarily have 16x time more memory than 4KB ones.
- */
-#define SK_MEM_QUANTUM 4096
-#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
#define SK_MEM_SEND 0
#define SK_MEM_RECV 1
-/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+/* sysctl_mem values are in pages */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long val = sk->sk_prot->sysctl_mem[index];
-
-#if PAGE_SIZE > SK_MEM_QUANTUM
- val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
-#elif PAGE_SIZE < SK_MEM_QUANTUM
- val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT;
-#endif
- return val;
+ return READ_ONCE(sk->sk_prot->sysctl_mem[index]);
}
static inline int sk_mem_pages(int amt)
{
- return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
+ return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT;
}
static inline bool sk_has_account(struct sock *sk)
@@ -1543,19 +1622,23 @@ static inline bool sk_has_account(struct sock *sk)
static inline bool sk_wmem_schedule(struct sock *sk, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_SEND);
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
}
static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
}
@@ -1581,7 +1664,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
- if (reclaimable >= SK_MEM_QUANTUM)
+ if (reclaimable >= (int)PAGE_SIZE)
__sk_mem_reclaim(sk, reclaimable);
}
@@ -1591,19 +1674,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk)
sk_mem_reclaim(sk);
}
-static inline void sk_mem_reclaim_partial(struct sock *sk)
-{
- int reclaimable;
-
- if (!sk_has_account(sk))
- return;
-
- reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
-
- if (reclaimable > SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk, reclaimable - 1);
-}
-
static inline void sk_mem_charge(struct sock *sk, int size)
{
if (!sk_has_account(sk))
@@ -1611,29 +1681,12 @@ static inline void sk_mem_charge(struct sock *sk, int size)
sk->sk_forward_alloc -= size;
}
-/* the following macros control memory reclaiming in sk_mem_uncharge()
- */
-#define SK_RECLAIM_THRESHOLD (1 << 21)
-#define SK_RECLAIM_CHUNK (1 << 20)
-
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
- int reclaimable;
-
if (!sk_has_account(sk))
return;
sk->sk_forward_alloc += size;
- reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
-
- /* Avoid a possible overflow.
- * TCP send queues can make this happen, if sk_mem_reclaim()
- * is not called and more than 2 GBytes are released at once.
- *
- * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
- * no need to hold that much forward allocation anyway.
- */
- if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
- __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
+ sk_mem_reclaim(sk);
}
/*
@@ -1730,6 +1783,11 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
}
}
+void sockopt_lock_sock(struct sock *sk);
+void sockopt_release_sock(struct sock *sk);
+bool sockopt_ns_capable(struct user_namespace *ns, int cap);
+bool sockopt_capable(int cap);
+
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
@@ -1804,23 +1862,40 @@ void sock_pfree(struct sk_buff *skb);
#define sock_edemux sock_efree
#endif
+int sk_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
int sock_setsockopt(struct socket *sock, int level, int op,
sockptr_t optval, unsigned int optlen);
+int sk_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen);
int sock_getsockopt(struct socket *sock, int level, int op,
char __user *optval, int __user *optlen);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32);
-struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
int *errcode, int max_page_order);
+
+static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
+ unsigned long size,
+ int noblock, int *errcode)
+{
+ return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
+}
+
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
void sk_send_sigurg(struct sock *sk);
+static inline void sock_replace_proto(struct sock *sk, struct proto *proto)
+{
+ if (sk->sk_socket)
+ clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ WRITE_ONCE(sk->sk_prot, proto);
+}
+
struct sockcm_cookie {
u64 transmit_time;
u32 mark;
@@ -2041,7 +2116,7 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
static inline u32 net_tx_rndhash(void)
{
- u32 v = prandom_u32();
+ u32 v = get_random_u32();
return v ?: 1;
}
@@ -2054,7 +2129,7 @@ static inline void sk_set_txhash(struct sock *sk)
static inline bool sk_rethink_txhash(struct sock *sk)
{
- if (sk->sk_txhash) {
+ if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
sk_set_txhash(sk);
return true;
}
@@ -2217,9 +2292,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
if (err)
return err;
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
+ skb_len_add(skb, copy);
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
@@ -2378,7 +2451,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
void (*destructor)(struct sock *sk,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);
+
+static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return sock_queue_rcv_skb_reason(sk, skb, NULL);
+}
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
@@ -2512,7 +2592,7 @@ static inline gfp_t gfp_any(void)
static inline gfp_t gfp_memcg_charge(void)
{
- return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
+ return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
@@ -2629,20 +2709,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
__sock_recv_wifi_status(msg, sk, skb);
}
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb);
+void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb);
#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
-static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
{
-#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
- (1UL << SOCK_RCVTSTAMP))
+#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
+ (1UL << SOCK_RCVTSTAMP) | \
+ (1UL << SOCK_RCVMARK))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
- __sock_recv_ts_and_drops(msg, sk, skb);
+ if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
+ __sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
@@ -2667,7 +2748,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
__sock_tx_timestamp(tsflags, tx_flags);
if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
- *tskey = sk->sk_tskey++;
+ *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
}
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
@@ -2704,18 +2785,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static inline
-struct net *sock_net(const struct sock *sk)
-{
- return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
- write_pnet(&sk->sk_net, net);
-}
-
static inline bool
skb_sk_is_prefetched(struct sk_buff *skb)
{
@@ -2832,18 +2901,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
@@ -2864,13 +2933,14 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val)
*/
static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
{
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
int mdif;
- if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
+ if (!bound_dev_if || bound_dev_if == dif)
return true;
mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
- if (mdif && mdif == sk->sk_bound_dev_if)
+ if (mdif && mdif == bound_dev_if)
return true;
return false;