aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/net/inet_timewait_sock.h2
-rw-r--r--include/net/request_sock.h7
-rw-r--r--include/net/sock.h41
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c18
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c11
16 files changed, 72 insertions, 42 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e442e6e9a365..86a7edaa6797 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -356,8 +356,8 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
struct tcp_timewait_sock {
struct inet_timewait_sock tw_sk;
- u32 tw_rcv_nxt;
- u32 tw_snd_nxt;
+#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
+#define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt
u32 tw_rcv_wnd;
u32 tw_ts_offset;
u32 tw_ts_recent;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 186f3a1e1b1f..e581fc69129d 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -70,6 +70,7 @@ struct inet_timewait_sock {
#define tw_dport __tw_common.skc_dport
#define tw_num __tw_common.skc_num
#define tw_cookie __tw_common.skc_cookie
+#define tw_dr __tw_common.skc_tw_dr
int tw_timeout;
volatile unsigned char tw_substate;
@@ -88,7 +89,6 @@ struct inet_timewait_sock {
kmemcheck_bitfield_end(flags);
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
- struct inet_timewait_death_row *tw_dr;
};
#define tw_tclass tw_tos
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 95ab5d7aab96..2e73748956d5 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -50,16 +50,15 @@ struct request_sock {
struct sock_common __req_common;
#define rsk_refcnt __req_common.skc_refcnt
#define rsk_hash __req_common.skc_hash
+#define rsk_listener __req_common.skc_listener
+#define rsk_window_clamp __req_common.skc_window_clamp
+#define rsk_rcv_wnd __req_common.skc_rcv_wnd
struct request_sock *dl_next;
- struct sock *rsk_listener;
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
u8 num_timeout:7; /* number of timeouts */
- /* The following two fields can be easily recomputed I think -AK */
- u32 window_clamp; /* window clamp at creation time */
- u32 rcv_wnd; /* rcv_wnd offered first time */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
diff --git a/include/net/sock.h b/include/net/sock.h
index 9322cafd191b..64a75458d22c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -150,6 +150,10 @@ typedef __u64 __bitwise __addrpair;
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_tx_queue_mapping: tx queue number for this connection
+ * @skc_flags: place holder for sk_flags
+ * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
+ * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
+ * @skc_incoming_cpu: record/match cpu processing incoming packets
* @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
@@ -200,6 +204,16 @@ struct sock_common {
atomic64_t skc_cookie;
+ /* following fields are padding to force
+ * offset(struct sock, sk_refcnt) == 128 on 64bit arches
+ * assuming IPV6 is enabled. We use this padding differently
+ * for different kind of 'sockets'
+ */
+ union {
+ unsigned long skc_flags;
+ struct sock *skc_listener; /* request_sock */
+ struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
+ };
/*
* fields between dontcopy_begin/dontcopy_end
* are not copied in sock_copy()
@@ -212,9 +226,20 @@ struct sock_common {
struct hlist_nulls_node skc_nulls_node;
};
int skc_tx_queue_mapping;
+ union {
+ int skc_incoming_cpu;
+ u32 skc_rcv_wnd;
+ u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */
+ };
+
atomic_t skc_refcnt;
/* private: */
int skc_dontcopy_end[0];
+ union {
+ u32 skc_rxhash;
+ u32 skc_window_clamp;
+ u32 skc_tw_snd_nxt; /* struct tcp_timewait_sock */
+ };
/* public: */
};
@@ -243,8 +268,6 @@ struct cg_proto;
* @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
* @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
* @sk_sndbuf: size of send buffer in bytes
- * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
- * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
* @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
@@ -273,8 +296,6 @@ struct cg_proto;
* @sk_rcvlowat: %SO_RCVLOWAT setting
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
- * @sk_rxhash: flow hash received from netif layer
- * @sk_incoming_cpu: record cpu processing incoming packets
* @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
@@ -331,6 +352,9 @@ struct sock {
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_cookie __sk_common.skc_cookie
+#define sk_incoming_cpu __sk_common.skc_incoming_cpu
+#define sk_flags __sk_common.skc_flags
+#define sk_rxhash __sk_common.skc_rxhash
socket_lock_t sk_lock;
struct sk_buff_head sk_receive_queue;
@@ -350,14 +374,6 @@ struct sock {
} sk_backlog;
#define sk_rmem_alloc sk_backlog.rmem_alloc
int sk_forward_alloc;
-#ifdef CONFIG_RPS
- __u32 sk_rxhash;
-#endif
- u16 sk_incoming_cpu;
- /* 16bit hole
- * Warned : sk_incoming_cpu can be set from softirq,
- * Do not use this hole without fully understanding possible issues.
- */
__u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -373,7 +389,6 @@ struct sock {
#ifdef CONFIG_XFRM
struct xfrm_policy *sk_policy[2];
#endif
- unsigned long sk_flags;
struct dst_entry *sk_rx_dst;
struct dst_entry __rcu *sk_dst_cache;
spinlock_t sk_dst_lock;
diff --git a/net/core/sock.c b/net/core/sock.c
index 33957776cc1a..dcc7d62654d5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -988,6 +988,10 @@ set_rcvbuf:
sk->sk_max_pacing_rate);
break;
+ case SO_INCOMING_CPU:
+ sk->sk_incoming_cpu = val;
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -2379,6 +2383,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index bed8886a4b6c..08643a3616af 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -185,6 +185,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
}
return score;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2dbb11331f6c..4c0892badb8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -382,10 +382,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
}
/* Try to redo what tcp_v4_send_synack did. */
- req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
- &req->rcv_wnd, &req->window_clamp,
+ &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ddadb318e850..3b35c3f4d268 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6022,7 +6022,7 @@ static void tcp_openreq_init(struct request_sock *req,
{
struct inet_request_sock *ireq = inet_rsk(req);
- req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 34310748a365..ddb198392c7f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -803,7 +803,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
*/
tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
0,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1079e6ad77fe..41828bdc5d32 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -381,18 +381,18 @@ void tcp_openreq_init_rwin(struct request_sock *req,
window_clamp = READ_ONCE(tp->window_clamp);
/* Set this up on the first call only */
- req->window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK &&
- (req->window_clamp > full_space || req->window_clamp == 0))
- req->window_clamp = full_space;
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
- &req->rcv_wnd,
- &req->window_clamp,
+ &req->rsk_rcv_wnd,
+ &req->rsk_window_clamp,
ireq->wscale_ok,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
@@ -512,9 +512,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
if (sysctl_tcp_fack)
tcp_enable_fack(newtp);
}
- newtp->window_clamp = req->window_clamp;
- newtp->rcv_ssthresh = req->rcv_wnd;
- newtp->rcv_wnd = req->rcv_wnd;
+ newtp->window_clamp = req->rsk_window_clamp;
+ newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+ newtp->rcv_wnd = req->rsk_rcv_wnd;
newtp->rx_opt.wscale_ok = ireq->wscale_ok;
if (newtp->rx_opt.wscale_ok) {
newtp->rx_opt.snd_wscale = ireq->snd_wscale;
@@ -707,7 +707,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
+ tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(sk, skb, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 55ed3266b05f..6e79fcb0addb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3023,7 +3023,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
- th->window = htons(min(req->rcv_wnd, 65535U));
+ th->window = htons(min(req->rsk_rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e1fc129099ea..24ec14f9825c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -375,7 +375,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
-
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
return score;
}
@@ -419,6 +420,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score += 4;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6ac8dad0138a..21ace5a2bf7c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
}
return score;
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index f610b5310b17..bb8f2fa1c7fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -235,9 +235,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
}
- req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
- &req->rcv_wnd, &req->window_clamp,
+ &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33334f0c217d..2887c8474b65 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -931,7 +931,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
*/
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654f5b91..01bcb49619ee 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
-#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr,
@@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
@@ -251,8 +256,7 @@ begin:
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
matches = 1;
- } else if (score == SCORE2_MAX)
- goto exact_match;
+ }
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -269,7 +273,6 @@ begin:
goto begin;
if (result) {
-exact_match:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,