1 files changed, 747 insertions, 584 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69d8c38ccd39..19c449f62672 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,6 +105,7 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
 #define FLAG_NONHEAD_RETRANS_ACKED	0x1000 /* Non-head rexmitted data was ACKed */
+#define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -120,8 +121,7 @@ int sysctl_tcp_abc __read_mostly;
 /* Adapt the MSS value used to make delayed ack decision to the
  * real world.
  */
-static void tcp_measure_rcv_mss(struct sock *sk,
-				const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -132,7 +132,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 	/* skb->len may jitter because of SACKs, even if peer
 	 * sends good full-sized frames.
 	 */
-	len = skb_shinfo(skb)->gso_size ?: skb->len;
+	len = skb_shinfo(skb)->gso_size ? : skb->len;
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = len;
 	} else {
@@ -172,8 +172,8 @@ static void tcp_incr_quickack(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
 
-	if (quickacks==0)
-		quickacks=2;
+	if (quickacks == 0)
+		quickacks = 2;
 	if (quickacks > icsk->icsk_ack.quick)
 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
@@ -198,7 +198,7 @@ static inline int tcp_in_quickack_mode(const struct sock *sk)
 
 static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
 {
-	if (tp->ecn_flags&TCP_ECN_OK)
+	if (tp->ecn_flags & TCP_ECN_OK)
 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
 
@@ -215,7 +215,7 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
 
 static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if (tp->ecn_flags&TCP_ECN_OK) {
+	if (tp->ecn_flags & TCP_ECN_OK) {
 		if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		/* Funny extension: if ECT is not set on a segment,
@@ -228,19 +228,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
 
 static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr))
+	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
 static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr))
+	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
 static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 {
-	if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK))
+	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
 		return 1;
 	return 0;
 }
@@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
-	int truesize = tcp_win_from_space(skb->truesize)/2;
-	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
+	int truesize = tcp_win_from_space(skb->truesize) >> 1;
+	int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
@@ -302,8 +302,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk,
-			    struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -317,12 +316,13 @@ static void tcp_grow_window(struct sock *sk,
 		 * will fit to rcvbuf in future.
 		 */
 		if (tcp_win_from_space(skb->truesize) <= skb->len)
-			incr = 2*tp->advmss;
+			incr = 2 * tp->advmss;
 		else
 			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
+					       tp->window_clamp);
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
@@ -397,10 +397,9 @@ static void tcp_clamp_window(struct sock *sk)
 				    sysctl_tcp_rmem[2]);
 	}
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
-		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
+		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
 }
 
-
 /* Initialize RCV_MSS value.
  * RCV_MSS is an our guess about MSS used by the peer.
  * We haven't any direct information about the MSS.
@@ -413,7 +412,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
-	hint = min(hint, tp->rcv_wnd/2);
+	hint = min(hint, tp->rcv_wnd / 2);
 	hint = min(hint, TCP_MIN_RCVMSS);
 	hint = max(hint, TCP_MIN_MSS);
 
@@ -470,16 +469,15 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 		goto new_measure;
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
-	tcp_rcv_rtt_update(tp,
-			   jiffies - tp->rcv_rtt_est.time,
-			   1);
+	tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
 
 new_measure:
 	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
 	tp->rcv_rtt_est.time = tcp_time_stamp;
 }
 
-static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
+static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
+					  const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	if (tp->rx_opt.rcv_tsecr &&
@@ -502,8 +500,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		goto new_measure;
 
 	time = tcp_time_stamp - tp->rcvq_space.time;
-	if (time < (tp->rcv_rtt_est.rtt >> 3) ||
-	    tp->rcv_rtt_est.rtt == 0)
+	if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
 		return;
 
 	space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
@@ -579,7 +576,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 	} else {
 		int m = now - icsk->icsk_ack.lrcvtime;
 
-		if (m <= TCP_ATO_MIN/2) {
+		if (m <= TCP_ATO_MIN / 2) {
 			/* The fastest case is the first. */
 			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
 		} else if (m < icsk->icsk_ack.ato) {
@@ -591,7 +588,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 			 * restart window, so that we send ACKs quickly.
 			 */
 			tcp_incr_quickack(sk);
-			sk_stream_mem_reclaim(sk);
+			sk_mem_reclaim(sk);
 		}
 	}
 	icsk->icsk_ack.lrcvtime = now;
@@ -608,7 +605,7 @@ static u32 tcp_rto_min(struct sock *sk)
 	u32 rto_min = TCP_RTO_MIN;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-		rto_min = dst->metrics[RTAX_RTO_MIN-1];
+		rto_min = dst->metrics[RTAX_RTO_MIN - 1];
 	return rto_min;
 }
 
@@ -671,14 +668,14 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 		}
 		if (after(tp->snd_una, tp->rtt_seq)) {
 			if (tp->mdev_max < tp->rttvar)
-				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+				tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
 			tp->rtt_seq = tp->snd_nxt;
 			tp->mdev_max = tcp_rto_min(sk);
 		}
 	} else {
 		/* no previous measure. */
-		tp->srtt = m<<3;	/* take the measured time to be rtt */
-		tp->mdev = m<<1;	/* make sure rto = 3*rtt */
+		tp->srtt = m << 3;	/* take the measured time to be rtt */
+		tp->mdev = m << 1;	/* make sure rto = 3*rtt */
 		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 		tp->rtt_seq = tp->snd_nxt;
 	}
@@ -732,7 +729,7 @@ void tcp_update_metrics(struct sock *sk)
 
 	dst_confirm(dst);
 
-	if (dst && (dst->flags&DST_HOST)) {
+	if (dst && (dst->flags & DST_HOST)) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		int m;
 
@@ -742,7 +739,7 @@ void tcp_update_metrics(struct sock *sk)
 			 * Reset our results.
 			 */
 			if (!(dst_metric_locked(dst, RTAX_RTT)))
-				dst->metrics[RTAX_RTT-1] = 0;
+				dst->metrics[RTAX_RTT - 1] = 0;
 			return;
 		}
 
@@ -754,9 +751,9 @@ void tcp_update_metrics(struct sock *sk)
 		 */
 		if (!(dst_metric_locked(dst, RTAX_RTT))) {
 			if (m <= 0)
-				dst->metrics[RTAX_RTT-1] = tp->srtt;
+				dst->metrics[RTAX_RTT - 1] = tp->srtt;
 			else
-				dst->metrics[RTAX_RTT-1] -= (m>>3);
+				dst->metrics[RTAX_RTT - 1] -= (m >> 3);
 		}
 
 		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
@@ -769,7 +766,7 @@ void tcp_update_metrics(struct sock *sk)
 				m = tp->mdev;
 
 			if (m >= dst_metric(dst, RTAX_RTTVAR))
-				dst->metrics[RTAX_RTTVAR-1] = m;
+				dst->metrics[RTAX_RTTVAR - 1] = m;
 			else
 				dst->metrics[RTAX_RTTVAR-1] -=
 					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
@@ -783,7 +780,7 @@ void tcp_update_metrics(struct sock *sk)
 				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
 			if (!dst_metric_locked(dst, RTAX_CWND) &&
 			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
-				dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
+				dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
 		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
 			   icsk->icsk_ca_state == TCP_CA_Open) {
 			/* Cong. avoidance phase, cwnd is reliable. */
@@ -863,6 +860,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
  */
 static void tcp_disable_fack(struct tcp_sock *tp)
 {
+	/* RFC3517 uses different metric in lost marker => reset on change */
+	if (tcp_is_fack(tp))
+		tp->lost_skb_hint = NULL;
 	tp->rx_opt.sack_ok &= ~2;
 }
 
@@ -923,7 +923,7 @@ static void tcp_init_metrics(struct sock *sk)
 	}
 	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
 		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
-		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 	}
 	tcp_set_rto(sk);
 	tcp_bound_rto(sk);
@@ -1112,16 +1112,22 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
  *
  * Search retransmitted skbs from write_queue that were sent when snd_nxt was
  * less than what is now known to be received by the other end (derived from
- * SACK blocks by the caller). Also calculate the lowest snd_nxt among the
- * remaining retransmitted skbs to avoid some costly processing per ACKs.
+ * highest SACK block). Also calculate the lowest snd_nxt among the remaining
+ * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
+static void tcp_mark_lost_retrans(struct sock *sk)
 {
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int flag = 0;
 	int cnt = 0;
 	u32 new_low_seq = tp->snd_nxt;
+	u32 received_upto = tcp_highest_sack_seq(tp);
+
+	if (!tcp_is_fack(tp) || !tp->retrans_out ||
+	    !after(received_upto, tp->lost_retrans_low) ||
+	    icsk->icsk_ca_state != TCP_CA_Recovery)
+		return;
 
 	tcp_for_write_queue(skb, sk) {
 		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1149,9 +1155,8 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
 			if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 				tp->lost_out += tcp_skb_pcount(skb);
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				flag |= FLAG_DATA_SACKED;
-				NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 			}
+			NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
 		} else {
 			if (before(ack_seq, new_low_seq))
 				new_low_seq = ack_seq;
@@ -1161,8 +1166,6 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
 
 	if (tp->retrans_out)
 		tp->lost_retrans_low = new_low_seq;
-
-	return flag;
 }
 
 static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1230,34 +1233,205 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+			   int *reord, int dup_sack, int fack_count)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u8 sacked = TCP_SKB_CB(skb)->sacked;
+	int flag = 0;
+
+	/* Account D-SACK for retransmitted packet. */
+	if (dup_sack && (sacked & TCPCB_RETRANS)) {
+		if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+			tp->undo_retrans--;
+		if (sacked & TCPCB_SACKED_ACKED)
+			*reord = min(fack_count, *reord);
+	}
+
+	/* Nothing to do; acked frame is about to be dropped (was ACKed). */
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+		return flag;
+
+	if (!(sacked & TCPCB_SACKED_ACKED)) {
+		if (sacked & TCPCB_SACKED_RETRANS) {
+			/* If the segment is not tagged as lost,
+			 * we do not clear RETRANS, believing
+			 * that retransmission is still in flight.
+			 */
+			if (sacked & TCPCB_LOST) {
+				TCP_SKB_CB(skb)->sacked &=
+					~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+				tp->lost_out -= tcp_skb_pcount(skb);
+				tp->retrans_out -= tcp_skb_pcount(skb);
+
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+			}
+		} else {
+			if (!(sacked & TCPCB_RETRANS)) {
+				/* New sack for not retransmitted frame,
+				 * which was in hole. It is reordering.
+				 */
+				if (before(TCP_SKB_CB(skb)->seq,
+					   tcp_highest_sack_seq(tp)))
+					*reord = min(fack_count, *reord);
+
+				/* SACK enhanced F-RTO (RFC4138; Appendix B) */
+				if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+					flag |= FLAG_ONLY_ORIG_SACKED;
+			}
+
+			if (sacked & TCPCB_LOST) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+				tp->lost_out -= tcp_skb_pcount(skb);
+
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+			}
+		}
+
+		TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+		flag |= FLAG_DATA_SACKED;
+		tp->sacked_out += tcp_skb_pcount(skb);
+
+		fack_count += tcp_skb_pcount(skb);
+
+		/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
+		if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+		    before(TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(tp->lost_skb_hint)->seq))
+			tp->lost_cnt_hint += tcp_skb_pcount(skb);
+
+		if (fack_count > tp->fackets_out)
+			tp->fackets_out = fack_count;
+
+		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+			tcp_advance_highest_sack(sk, skb);
+	}
+
+	/* D-SACK. We can detect redundant retransmission in S|R and plain R
+	 * frames and clear it. undo_retrans is decreased above, L|R frames
+	 * are accounted above as well.
+	 */
+	if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+		tp->retrans_out -= tcp_skb_pcount(skb);
+		tp->retransmit_skb_hint = NULL;
+	}
+
+	return flag;
+}
+
+static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
+					struct tcp_sack_block *next_dup,
+					u32 start_seq, u32 end_seq,
+					int dup_sack_in, int *fack_count,
+					int *reord, int *flag)
+{
+	tcp_for_write_queue_from(skb, sk) {
+		int in_sack = 0;
+		int dup_sack = dup_sack_in;
+
+		if (skb == tcp_send_head(sk))
+			break;
+
+		/* queue is in-order => we can short-circuit the walk early */
+		if (!before(TCP_SKB_CB(skb)->seq, end_seq))
+			break;
+
+		if ((next_dup != NULL) &&
+		    before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
+			in_sack = tcp_match_skb_to_sack(sk, skb,
+							next_dup->start_seq,
+							next_dup->end_seq);
+			if (in_sack > 0)
+				dup_sack = 1;
+		}
+
+		if (in_sack <= 0)
+			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
+							end_seq);
+		if (unlikely(in_sack < 0))
+			break;
+
+		if (in_sack)
+			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
+						 *fack_count);
+
+		*fack_count += tcp_skb_pcount(skb);
+	}
+	return skb;
+}
+
+/* Avoid all extra work that is being done by sacktag while walking in
+ * a normal way
+ */
+static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
+					u32 skip_to_seq)
+{
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
+
+		if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
+			break;
+	}
+	return skb;
+}
+
+static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
+						struct sock *sk,
+						struct tcp_sack_block *next_dup,
+						u32 skip_to_seq,
+						int *fack_count, int *reord,
+						int *flag)
+{
+	if (next_dup == NULL)
+		return skb;
+
+	if (before(next_dup->start_seq, skip_to_seq)) {
+		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
+		tcp_sacktag_walk(skb, sk, NULL,
+				 next_dup->start_seq, next_dup->end_seq,
+				 1, fack_count, reord, flag);
+	}
+
+	return skb;
+}
+
+static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+{
+	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+}
+
 static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+			u32 prior_snd_una)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned char *ptr = (skb_transport_header(ack_skb) +
 			      TCP_SKB_CB(ack_skb)->sacked);
-	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
-	struct sk_buff *cached_skb;
-	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
+	struct tcp_sack_block sp[4];
+	struct tcp_sack_block *cache;
+	struct sk_buff *skb;
+	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
+	int used_sacks;
 	int reord = tp->packets_out;
-	int prior_fackets;
-	u32 highest_sack_end_seq = tp->lost_retrans_low;
 	int flag = 0;
 	int found_dup_sack = 0;
-	int cached_fack_count;
-	int i;
+	int fack_count;
+	int i, j;
 	int first_sack_index;
-	int force_one_sack;
 
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
 			tp->fackets_out = 0;
-		tp->highest_sack = tp->snd_una;
+		tcp_highest_sack_reset(sk);
 	}
-	prior_fackets = tp->fackets_out;
 
-	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,
+	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
 					 num_sacks, prior_snd_una);
 	if (found_dup_sack)
 		flag |= FLAG_DSACKING_ACK;
@@ -1269,75 +1443,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
 		return 0;
 
-	/* SACK fastpath:
-	 * if the only SACK change is the increase of the end_seq of
-	 * the first block then only apply that SACK block
-	 * and use retrans queue hinting otherwise slowpath */
-	force_one_sack = 1;
-	for (i = 0; i < num_sacks; i++) {
-		__be32 start_seq = sp[i].start_seq;
-		__be32 end_seq = sp[i].end_seq;
-
-		if (i == 0) {
-			if (tp->recv_sack_cache[i].start_seq != start_seq)
-				force_one_sack = 0;
-		} else {
-			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
-			    (tp->recv_sack_cache[i].end_seq != end_seq))
-				force_one_sack = 0;
-		}
-		tp->recv_sack_cache[i].start_seq = start_seq;
-		tp->recv_sack_cache[i].end_seq = end_seq;
-	}
-	/* Clear the rest of the cache sack blocks so they won't match mistakenly. */
-	for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
-		tp->recv_sack_cache[i].start_seq = 0;
-		tp->recv_sack_cache[i].end_seq = 0;
-	}
+	if (!tp->packets_out)
+		goto out;
 
+	used_sacks = 0;
 	first_sack_index = 0;
-	if (force_one_sack)
-		num_sacks = 1;
-	else {
-		int j;
-		tp->fastpath_skb_hint = NULL;
-
-		/* order SACK blocks to allow in order walk of the retrans queue */
-		for (i = num_sacks-1; i > 0; i--) {
-			for (j = 0; j < i; j++){
-				if (after(ntohl(sp[j].start_seq),
-					  ntohl(sp[j+1].start_seq))){
-					struct tcp_sack_block_wire tmp;
-
-					tmp = sp[j];
-					sp[j] = sp[j+1];
-					sp[j+1] = tmp;
-
-					/* Track where the first SACK block goes to */
-					if (j == first_sack_index)
-						first_sack_index = j+1;
-				}
-
-			}
-		}
-	}
-
-	/* Use SACK fastpath hint if valid */
-	cached_skb = tp->fastpath_skb_hint;
-	cached_fack_count = tp->fastpath_cnt_hint;
-	if (!cached_skb) {
-		cached_skb = tcp_write_queue_head(sk);
-		cached_fack_count = 0;
-	}
+	for (i = 0; i < num_sacks; i++) {
+		int dup_sack = !i && found_dup_sack;
 
-	for (i=0; i<num_sacks; i++, sp++) {
-		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
-		int fack_count;
-		int dup_sack = (found_dup_sack && (i == first_sack_index));
+		sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq));
+		sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq));
 
-		if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {
+		if (!tcp_is_sackblock_valid(tp, dup_sack,
+					    sp[used_sacks].start_seq,
+					    sp[used_sacks].end_seq)) {
 			if (dup_sack) {
 				if (!tp->undo_marker)
 					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
@@ -1346,157 +1465,152 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			} else {
 				/* Don't count olds caused by ACK reordering */
 				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
-				    !after(end_seq, tp->snd_una))
+				    !after(sp[used_sacks].end_seq, tp->snd_una))
 					continue;
 				NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
 			}
+			if (i == 0)
+				first_sack_index = -1;
 			continue;
 		}
 
-		skb = cached_skb;
-		fack_count = cached_fack_count;
+		/* Ignore very old stuff early */
+		if (!after(sp[used_sacks].end_seq, prior_snd_una))
+			continue;
 
-		/* Event "B" in the comment above. */
-		if (after(end_seq, tp->high_seq))
-			flag |= FLAG_DATA_LOST;
+		used_sacks++;
+	}
 
-		tcp_for_write_queue_from(skb, sk) {
-			int in_sack;
-			u8 sacked;
+	/* order SACK blocks to allow in order walk of the retrans queue */
+	for (i = used_sacks - 1; i > 0; i--) {
+		for (j = 0; j < i; j++) {
+			if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
+				struct tcp_sack_block tmp;
 
-			if (skb == tcp_send_head(sk))
-				break;
+				tmp = sp[j];
+				sp[j] = sp[j + 1];
+				sp[j + 1] = tmp;
 
-			cached_skb = skb;
-			cached_fack_count = fack_count;
-			if (i == first_sack_index) {
-				tp->fastpath_skb_hint = skb;
-				tp->fastpath_cnt_hint = fack_count;
+				/* Track where the first SACK block goes to */
+				if (j == first_sack_index)
+					first_sack_index = j + 1;
 			}
+		}
+	}
 
-			/* The retransmission queue is always in order, so
-			 * we can short-circuit the walk early.
-			 */
-			if (!before(TCP_SKB_CB(skb)->seq, end_seq))
-				break;
+	skb = tcp_write_queue_head(sk);
+	fack_count = 0;
+	i = 0;
 
-			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
-			if (in_sack < 0)
-				break;
+	if (!tp->sacked_out) {
+		/* It's already past, so skip checking against it */
+		cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+	} else {
+		cache = tp->recv_sack_cache;
+		/* Skip empty blocks in at head of the cache */
+		while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
+		       !cache->end_seq)
+			cache++;
+	}
 
-			fack_count += tcp_skb_pcount(skb);
-
-			sacked = TCP_SKB_CB(skb)->sacked;
-
-			/* Account D-SACK for retransmitted packet. */
-			if ((dup_sack && in_sack) &&
-			    (sacked & TCPCB_RETRANS) &&
-			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
-				tp->undo_retrans--;
-
-			/* The frame is ACKed. */
-			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
-				if (sacked&TCPCB_RETRANS) {
-					if ((dup_sack && in_sack) &&
-					    (sacked&TCPCB_SACKED_ACKED))
-						reord = min(fack_count, reord);
-				} else {
-					/* If it was in a hole, we detected reordering. */
-					if (fack_count < prior_fackets &&
-					    !(sacked&TCPCB_SACKED_ACKED))
-						reord = min(fack_count, reord);
-				}
+	while (i < used_sacks) {
+		u32 start_seq = sp[i].start_seq;
+		u32 end_seq = sp[i].end_seq;
+		int dup_sack = (found_dup_sack && (i == first_sack_index));
+		struct tcp_sack_block *next_dup = NULL;
+
+		if (found_dup_sack && ((i + 1) == first_sack_index))
+			next_dup = &sp[i + 1];
+
+		/* Event "B" in the comment above. */
+		if (after(end_seq, tp->high_seq))
+			flag |= FLAG_DATA_LOST;
 
-				/* Nothing to do; acked frame is about to be dropped. */
-				continue;
+		/* Skip too early cached blocks */
+		while (tcp_sack_cache_ok(tp, cache) &&
+		       !before(start_seq, cache->end_seq))
+			cache++;
+
+		/* Can skip some work by looking recv_sack_cache? */
+		if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
+		    after(end_seq, cache->start_seq)) {
+
+			/* Head todo? */
+			if (before(start_seq, cache->start_seq)) {
+				skb = tcp_sacktag_skip(skb, sk, start_seq);
+				skb = tcp_sacktag_walk(skb, sk, next_dup,
+						       start_seq,
+						       cache->start_seq,
+						       dup_sack, &fack_count,
+						       &reord, &flag);
 			}
 
-			if (!in_sack)
-				continue;
+			/* Rest of the block already fully processed? */
+			if (!after(end_seq, cache->end_seq))
+				goto advance_sp;
 
-			if (!(sacked&TCPCB_SACKED_ACKED)) {
-				if (sacked & TCPCB_SACKED_RETRANS) {
-					/* If the segment is not tagged as lost,
-					 * we do not clear RETRANS, believing
-					 * that retransmission is still in flight.
-					 */
-					if (sacked & TCPCB_LOST) {
-						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
-						tp->lost_out -= tcp_skb_pcount(skb);
-						tp->retrans_out -= tcp_skb_pcount(skb);
+			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
+						       cache->end_seq,
+						       &fack_count, &reord,
+						       &flag);
 
-						/* clear lost hint */
-						tp->retransmit_skb_hint = NULL;
-					}
-				} else {
-					/* New sack for not retransmitted frame,
-					 * which was in hole. It is reordering.
-					 */
-					if (!(sacked & TCPCB_RETRANS) &&
-					    fack_count < prior_fackets)
-						reord = min(fack_count, reord);
+			/* ...tail remains todo... */
+			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
+				/* ...but better entrypoint exists! */
+				skb = tcp_highest_sack(sk);
+				if (skb == NULL)
+					break;
+				fack_count = tp->fackets_out;
+				cache++;
+				goto walk;
+			}
 
-					if (sacked & TCPCB_LOST) {
-						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-						tp->lost_out -= tcp_skb_pcount(skb);
+			skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
+			/* Check overlap against next cached too (past this one already) */
+			cache++;
+			continue;
+		}
 
-						/* clear lost hint */
-						tp->retransmit_skb_hint = NULL;
-					}
-					/* SACK enhanced F-RTO detection.
-					 * Set flag if and only if non-rexmitted
-					 * segments below frto_highmark are
-					 * SACKed (RFC4138; Appendix B).
-					 * Clearing correct due to in-order walk
-					 */
-					if (after(end_seq, tp->frto_highmark)) {
-						flag &= ~FLAG_ONLY_ORIG_SACKED;
-					} else {
-						if (!(sacked & TCPCB_RETRANS))
-							flag |= FLAG_ONLY_ORIG_SACKED;
-					}
-				}
+		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+			skb = tcp_highest_sack(sk);
+			if (skb == NULL)
+				break;
+			fack_count = tp->fackets_out;
+		}
+		skb = tcp_sacktag_skip(skb, sk, start_seq);
 
-				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
-				flag |= FLAG_DATA_SACKED;
-				tp->sacked_out += tcp_skb_pcount(skb);
+walk:
+		skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
+				       dup_sack, &fack_count, &reord, &flag);
 
-				if (fack_count > tp->fackets_out)
-					tp->fackets_out = fack_count;
+advance_sp:
+		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
+		 * due to in-order walk
+		 */
+		if (after(end_seq, tp->frto_highmark))
+			flag &= ~FLAG_ONLY_ORIG_SACKED;
 
-				if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
-					tp->highest_sack = TCP_SKB_CB(skb)->seq;
-					highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
-				}
-			} else {
-				if (dup_sack && (sacked&TCPCB_RETRANS))
-					reord = min(fack_count, reord);
-			}
+		i++;
+	}
 
-			/* D-SACK. We can detect redundant retransmission
-			 * in S|R and plain R frames and clear it.
-			 * undo_retrans is decreased above, L|R frames
-			 * are accounted above as well.
-			 */
-			if (dup_sack &&
-			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
-				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-				tp->retrans_out -= tcp_skb_pcount(skb);
-				tp->retransmit_skb_hint = NULL;
-			}
-		}
+	/* Clear the head of the cache sack blocks so we can skip it next time */
+	for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
+		tp->recv_sack_cache[i].start_seq = 0;
+		tp->recv_sack_cache[i].end_seq = 0;
 	}
+	for (j = 0; j < used_sacks; j++)
+		tp->recv_sack_cache[i++] = sp[j];
 
-	if (tp->retrans_out &&
-	    after(highest_sack_end_seq, tp->lost_retrans_low) &&
-	    icsk->icsk_ca_state == TCP_CA_Recovery)
-		flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
+	tcp_mark_lost_retrans(sk);
 
 	tcp_verify_left_out(tp);
 
-	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+	if ((reord < tp->fackets_out) &&
+	    ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
 	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
-		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
+		tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+out:
 
 #if FASTRETRANS_DEBUG > 0
 	BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1543,10 +1657,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
-		if (acked-1 >= tp->sacked_out)
+		if (acked - 1 >= tp->sacked_out)
 			tp->sacked_out = 0;
 		else
-			tp->sacked_out -= acked-1;
+			tp->sacked_out -= acked - 1;
 	}
 	tcp_check_reno_reordering(sk, acked);
 	tcp_verify_left_out(tp);
@@ -1580,10 +1694,10 @@ int tcp_use_frto(struct sock *sk)
 	tcp_for_write_queue_from(skb, sk) {
 		if (skb == tcp_send_head(sk))
 			break;
-		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 			return 0;
 		/* Short-circuit when first non-SACKed skb has been checked */
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 			break;
 	}
 	return 1;
@@ -1652,6 +1766,9 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_verify_left_out(tp);
 
+	/* Too bad if TCP was application limited */
+	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
+
 	/* Earlier loss recovery underway (see RFC4138; Appendix B).
 	 * The last condition is necessary at least in tp->frto_counter case.
 	 */
@@ -1684,11 +1801,13 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tcp_for_write_queue(skb, sk) {
 		if (skb == tcp_send_head(sk))
 			break;
+
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		/*
 		 * Count the retransmission made on RTO correctly (only when
 		 * waiting for the first ACK and did not get it)...
 		 */
-		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+		if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
 			/* For some reason this R-bit might get cleared? */
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
 				tp->retrans_out += tcp_skb_pcount(skb);
@@ -1697,11 +1816,11 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 		} else {
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 				tp->undo_marker = 0;
-			TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		}
 
 		/* Don't lost mark skbs that were fwd transmitted after RTO */
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) &&
+		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
 		    !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
@@ -1716,7 +1835,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->bytes_acked = 0;
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
-					     sysctl_tcp_reordering);
+			       sysctl_tcp_reordering);
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
@@ -1783,7 +1902,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 		if (skb == tcp_send_head(sk))
 			break;
 
-		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
 			tp->undo_marker = 0;
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
@@ -1795,7 +1914,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_verify_left_out(tp);
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
-					     sysctl_tcp_reordering);
+			       sysctl_tcp_reordering);
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
@@ -1803,18 +1922,15 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->frto_counter = 0;
 }
 
-static int tcp_check_sack_reneging(struct sock *sk)
+/* If ACK arrived pointing to a remembered SACK, it means that our
+ * remembered SACKs do not reflect real state of receiver i.e.
+ * receiver _host_ is heavily congested (or buggy).
+ *
+ * Do processing similar to RTO timeout.
+ */
+static int tcp_check_sack_reneging(struct sock *sk, int flag)
 {
-	struct sk_buff *skb;
-
-	/* If ACK arrived pointing to a remembered SACK,
-	 * it means that our remembered SACKs do not reflect
-	 * real state of receiver i.e.
-	 * receiver _host_ is heavily congested (or buggy).
-	 * Do processing similar to RTO timeout.
-	 */
-	if ((skb = tcp_write_queue_head(sk)) != NULL &&
-	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+	if (flag & FLAG_SACK_RENEGING) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
 
@@ -1830,7 +1946,27 @@ static int tcp_check_sack_reneging(struct sock *sk)
 
 static inline int tcp_fackets_out(struct tcp_sock *tp)
 {
-	return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
+}
+
+/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
+ * counter when SACK is enabled (without SACK, sacked_out is used for
+ * that purpose).
+ *
+ * Instead, with FACK TCP uses fackets_out that includes both SACKed
+ * segments up to the highest received SACK block so far and holes in
+ * between them.
+ *
+ * With reordering, holes may still be in flight, so RFC3517 recovery
+ * uses pure sacked_out (total number of SACKed segments) even though
+ * it violates the RFC that uses duplicate ACKs, often these are equal
+ * but when e.g. out-of-window ACKs or packet duplication occurs,
+ * they differ. Since neither occurs due to loss, TCP should really
+ * ignore them.
+ */
+static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
+{
+	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 }
 
 static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
@@ -1953,13 +2089,13 @@ static int tcp_time_to_recover(struct sock *sk)
 		return 1;
 
 	/* Not-A-Trick#2 : Classic rule... */
-	if (tcp_fackets_out(tp) > tp->reordering)
+	if (tcp_dupack_heurestics(tp) > tp->reordering)
 		return 1;
 
 	/* Trick#3 : when we use RFC2988 timer restart, fast
 	 * retransmit can be triggered by timeout of queue head.
 	 */
-	if (tcp_head_timedout(sk))
+	if (tcp_is_fack(tp) && tcp_head_timedout(sk))
 		return 1;
 
 	/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1983,17 +2119,18 @@ static int tcp_time_to_recover(struct sock *sk)
  * retransmitted past LOST markings in the first place? I'm not fully sure
  * about undo and end of connection cases, which can cause R without L?
  */
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
-				       struct sk_buff *skb)
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	if ((tp->retransmit_skb_hint != NULL) &&
 	    before(TCP_SKB_CB(skb)->seq,
-	    TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
 		tp->retransmit_skb_hint = NULL;
 }
 
-/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, int packets)
+/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
+ * is against sacked "cnt", otherwise it's against facked "cnt"
+ */
+static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -2015,8 +2152,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
-		cnt += tcp_skb_pcount(skb);
-		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+
+		if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+		    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+			cnt += tcp_skb_pcount(skb);
+
+		if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
+		    after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2029,17 +2171,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 
 /* Account newly detected lost packet(s) */
 
-static void tcp_update_scoreboard(struct sock *sk)
+static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_is_fack(tp)) {
+	if (tcp_is_reno(tp)) {
+		tcp_mark_head_lost(sk, 1, fast_rexmit);
+	} else if (tcp_is_fack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, lost);
+		tcp_mark_head_lost(sk, lost, fast_rexmit);
 	} else {
-		tcp_mark_head_lost(sk, 1);
+		int sacked_upto = tp->sacked_out - tp->reordering;
+		if (sacked_upto < 0)
+			sacked_upto = 0;
+		tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -2047,7 +2194,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
+	if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -2059,7 +2206,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 			if (!tcp_skb_timedout(sk, skb))
 				break;
 
-			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+			if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
 				tcp_verify_retransmit_hint(tp, skb);
@@ -2078,7 +2225,7 @@ static void tcp_update_scoreboard(struct sock *sk)
 static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd,
-			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+			   tcp_packets_in_flight(tp) + tcp_max_burst(tp));
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2098,15 +2245,15 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int decr = tp->snd_cwnd_cnt + 1;
 
-	if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
-	    (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) {
-		tp->snd_cwnd_cnt = decr&1;
+	if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
+	    (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
+		tp->snd_cwnd_cnt = decr & 1;
 		decr >>= 1;
 
 		if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
 			tp->snd_cwnd -= decr;
 
-		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
 		tp->snd_cwnd_stamp = tcp_time_stamp;
 	}
 }
@@ -2150,7 +2297,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 		if (icsk->icsk_ca_ops->undo_cwnd)
 			tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
 		else
-			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
 
 		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2169,8 +2316,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
 {
-	return tp->undo_marker &&
-		(!tp->undo_retrans || tcp_packet_delayed(tp));
+	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
 }
 
 /* People celebrate: "We love our President!" */
@@ -2220,7 +2366,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
-	int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
+	int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
 
 	if (tcp_may_undo(tp)) {
 		/* Plain luck! Hole if filled with delayed
@@ -2289,7 +2435,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 	if (tp->retrans_out == 0)
 		tp->retrans_stamp = 0;
 
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tcp_enter_cwr(sk, 1);
 
 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
@@ -2335,7 +2481,6 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 }
 
-
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2347,38 +2492,35 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
  * It does _not_ decide what to send, it is made in function
  * tcp_xmit_retransmit_queue().
  */
-static void
-tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
-	int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
-				    (tp->fackets_out > tp->reordering));
+	int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+				    (tcp_fackets_out(tp) > tp->reordering));
+	int fast_rexmit = 0;
 
-	/* Some technical things:
-	 * 1. Reno does not count dupacks (sacked_out) automatically. */
-	if (!tp->packets_out)
+	if (WARN_ON(!tp->packets_out && tp->sacked_out))
 		tp->sacked_out = 0;
-
 	if (WARN_ON(!tp->sacked_out && tp->fackets_out))
 		tp->fackets_out = 0;
 
 	/* Now state machine starts.
 	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tp->prior_ssthresh = 0;
 
 	/* B. In all the states check for reneging SACKs. */
-	if (tp->sacked_out && tcp_check_sack_reneging(sk))
+	if (tcp_check_sack_reneging(sk, flag))
 		return;
 
 	/* C. Process data loss notification, provided it is valid. */
-	if ((flag&FLAG_DATA_LOST) &&
+	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
+		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -2438,7 +2580,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 			do_lost = tcp_try_undo_partial(sk, pkts_acked);
 		break;
 	case TCP_CA_Loss:
-		if (flag&FLAG_DATA_ACKED)
+		if (flag & FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
 		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
@@ -2488,7 +2630,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		tp->undo_retrans = tp->retrans_out;
 
 		if (icsk->icsk_ca_state < TCP_CA_CWR) {
-			if (!(flag&FLAG_ECE))
+			if (!(flag & FLAG_ECE))
 				tp->prior_ssthresh = tcp_current_ssthresh(sk);
 			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 			TCP_ECN_queue_cwr(tp);
@@ -2497,10 +2639,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
+		fast_rexmit = 1;
 	}
 
-	if (do_lost || tcp_head_timedout(sk))
-		tcp_update_scoreboard(sk);
+	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+		tcp_update_scoreboard(sk, fast_rexmit);
 	tcp_cwnd_down(sk, flag);
 	tcp_xmit_retransmit_queue(sk);
 }
@@ -2564,11 +2707,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static void tcp_cong_avoid(struct sock *sk, u32 ack,
-			   u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
+	icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
 	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2582,7 +2724,8 @@ static void tcp_rearm_rto(struct sock *sk)
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	} else {
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 	}
 }
 
@@ -2611,7 +2754,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2619,29 +2762,32 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 	u32 now = tcp_time_stamp;
 	int fully_acked = 1;
 	int flag = 0;
-	int prior_packets = tp->packets_out;
+	u32 pkts_acked = 0;
+	u32 reord = tp->packets_out;
 	s32 seq_rtt = -1;
+	s32 ca_seq_rtt = -1;
 	ktime_t last_ackt = net_invalid_timestamp();
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		u32 end_seq;
-		u32 packets_acked;
+		u32 acked_pcount;
 		u8 sacked = scb->sacked;
 
+		/* Determine how many packets and what bytes were acked, tso and else */
 		if (after(scb->end_seq, tp->snd_una)) {
 			if (tcp_skb_pcount(skb) == 1 ||
 			    !after(tp->snd_una, scb->seq))
 				break;
 
-			packets_acked = tcp_tso_acked(sk, skb);
-			if (!packets_acked)
+			acked_pcount = tcp_tso_acked(sk, skb);
+			if (!acked_pcount)
 				break;
 
 			fully_acked = 0;
 			end_seq = tp->snd_una;
 		} else {
-			packets_acked = tcp_skb_pcount(skb);
+			acked_pcount = tcp_skb_pcount(skb);
 			end_seq = scb->end_seq;
 		}
 
@@ -2651,35 +2797,34 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 			tcp_mtup_probe_success(sk, skb);
 		}
 
-		if (sacked) {
-			if (sacked & TCPCB_RETRANS) {
-				if (sacked & TCPCB_SACKED_RETRANS)
-					tp->retrans_out -= packets_acked;
-				flag |= FLAG_RETRANS_DATA_ACKED;
-				seq_rtt = -1;
-				if ((flag & FLAG_DATA_ACKED) ||
-				    (packets_acked > 1))
-					flag |= FLAG_NONHEAD_RETRANS_ACKED;
-			} else if (seq_rtt < 0) {
-				seq_rtt = now - scb->when;
-				if (fully_acked)
-					last_ackt = skb->tstamp;
+		if (sacked & TCPCB_RETRANS) {
+			if (sacked & TCPCB_SACKED_RETRANS)
+				tp->retrans_out -= acked_pcount;
+			flag |= FLAG_RETRANS_DATA_ACKED;
+			ca_seq_rtt = -1;
+			seq_rtt = -1;
+			if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
+				flag |= FLAG_NONHEAD_RETRANS_ACKED;
+		} else {
+			ca_seq_rtt = now - scb->when;
+			last_ackt = skb->tstamp;
+			if (seq_rtt < 0) {
+				seq_rtt = ca_seq_rtt;
 			}
-
-			if (sacked & TCPCB_SACKED_ACKED)
-				tp->sacked_out -= packets_acked;
-			if (sacked & TCPCB_LOST)
-				tp->lost_out -= packets_acked;
-
-			if ((sacked & TCPCB_URG) && tp->urg_mode &&
-			    !before(end_seq, tp->snd_up))
-				tp->urg_mode = 0;
-		} else if (seq_rtt < 0) {
-			seq_rtt = now - scb->when;
-			if (fully_acked)
-				last_ackt = skb->tstamp;
+			if (!(sacked & TCPCB_SACKED_ACKED))
+				reord = min(pkts_acked, reord);
 		}
-		tp->packets_out -= packets_acked;
+
+		if (sacked & TCPCB_SACKED_ACKED)
+			tp->sacked_out -= acked_pcount;
+		if (sacked & TCPCB_LOST)
+			tp->lost_out -= acked_pcount;
+
+		if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
+			tp->urg_mode = 0;
+
+		tp->packets_out -= acked_pcount;
+		pkts_acked += acked_pcount;
 
 		/* Initial outgoing SYN's get put onto the write_queue
 		 * just like anything else we transmit.  It is not
@@ -2699,24 +2844,29 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 			break;
 
 		tcp_unlink_write_queue(skb, sk);
-		sk_stream_free_skb(sk, skb);
+		sk_wmem_free_skb(sk, skb);
 		tcp_clear_all_retrans_hints(tp);
 	}
 
+	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+		flag |= FLAG_SACK_RENEGING;
+
 	if (flag & FLAG_ACKED) {
-		u32 pkts_acked = prior_packets - tp->packets_out;
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
 
 		tcp_ack_update_rtt(sk, flag, seq_rtt);
 		tcp_rearm_rto(sk);
 
-		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-		/* hint's skb might be NULL but we don't need to care */
-		tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
-					       tp->fastpath_cnt_hint);
-		if (tcp_is_reno(tp))
+		if (tcp_is_reno(tp)) {
 			tcp_remove_reno_sacks(sk, pkts_acked);
+		} else {
+			/* Non-retransmitted hole got filled? That's reordering */
+			if (reord < prior_fackets)
+				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+		}
+
+		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
 		if (ca_ops->pkts_acked) {
 			s32 rtt_us = -1;
@@ -2729,8 +2879,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 						 net_invalid_timestamp()))
 					rtt_us = ktime_us_delta(ktime_get_real(),
 								last_ackt);
-				else if (seq_rtt > 0)
-					rtt_us = jiffies_to_usecs(seq_rtt);
+				else if (ca_seq_rtt > 0)
+					rtt_us = jiffies_to_usecs(ca_seq_rtt);
 			}
 
 			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
@@ -2760,7 +2910,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p)
 		}
 	}
 #endif
-	*seq_rtt_p = seq_rtt;
 	return flag;
 }
 
@@ -2771,8 +2920,7 @@ static void tcp_ack_probe(struct sock *sk)
 
 	/* Was it a usable window open? */
 
-	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
-		   tp->snd_una + tp->snd_wnd)) {
+	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
 		/* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -2801,8 +2949,9 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 /* Check that window update is acceptable.
  * The function assumes that snd_una<=ack<=snd_next.
  */
-static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
-					const u32 ack_seq, const u32 nwin)
+static inline int tcp_may_update_window(const struct tcp_sock *tp,
+					const u32 ack, const u32 ack_seq,
+					const u32 nwin)
 {
 	return (after(ack, tp->snd_una) ||
 		after(ack_seq, tp->snd_wl1) ||
@@ -2871,7 +3020,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
 
 static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
-	if (flag&FLAG_ECE)
+	if (flag & FLAG_ECE)
 		tcp_ratehalving_spur_to_response(sk);
 	else
 		tcp_undo_cwr(sk, 1);
@@ -2914,7 +3063,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 	tcp_verify_left_out(tp);
 
 	/* Duplicate the behavior from Loss state (fastretrans_alert) */
-	if (flag&FLAG_DATA_ACKED)
+	if (flag & FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
@@ -2931,16 +3080,16 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		 * ACK isn't duplicate nor advances window, e.g., opposite dir
 		 * data, winupdate
 		 */
-		if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP))
+		if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
 			return 1;
 
-		if (!(flag&FLAG_DATA_ACKED)) {
+		if (!(flag & FLAG_DATA_ACKED)) {
 			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
 					    flag);
 			return 1;
 		}
 	} else {
-		if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
 			/* Prevent sending of new data. */
 			tp->snd_cwnd = min(tp->snd_cwnd,
 					   tcp_packets_in_flight(tp));
@@ -2948,10 +3097,12 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		}
 
 		if ((tp->frto_counter >= 2) &&
-		    (!(flag&FLAG_FORWARD_PROGRESS) ||
-		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+		    (!(flag & FLAG_FORWARD_PROGRESS) ||
+		     ((flag & FLAG_DATA_SACKED) &&
+		      !(flag & FLAG_ONLY_ORIG_SACKED)))) {
 			/* RFC4138 shortcoming (see comment above) */
-			if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+			if (!(flag & FLAG_FORWARD_PROGRESS) &&
+			    (flag & FLAG_NOT_DUP))
 				return 1;
 
 			tcp_enter_frto_loss(sk, 3, flag);
@@ -2960,17 +3111,13 @@ static int tcp_process_frto(struct sock *sk, int flag)
 	}
 
 	if (tp->frto_counter == 1) {
-		/* Sending of the next skb must be allowed or no F-RTO */
-		if (!tcp_send_head(sk) ||
-		    after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
-				     tp->snd_una + tp->snd_wnd)) {
-			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
-					    flag);
-			return 1;
-		}
-
+		/* tcp_may_send_now needs to see updated state */
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
+
+		if (!tcp_may_send_now(sk))
+			tcp_enter_frto_loss(sk, 2, flag);
+
 		return 1;
 	} else {
 		switch (sysctl_tcp_frto_response) {
@@ -3000,7 +3147,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
-	s32 seq_rtt;
+	u32 prior_fackets;
 	int prior_packets;
 	int frto_cwnd = 0;
 
@@ -3021,10 +3168,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 			tp->bytes_acked += ack - prior_snd_una;
 		else if (icsk->icsk_ca_state == TCP_CA_Loss)
 			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+			tp->bytes_acked += min(ack - prior_snd_una,
+					       tp->mss_cache);
 	}
 
-	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+	prior_fackets = tp->fackets_out;
+	prior_in_flight = tcp_packets_in_flight(tp);
+
+	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
 		 * Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3062,29 +3213,28 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (!prior_packets)
 		goto no_queue;
 
-	prior_in_flight = tcp_packets_in_flight(tp);
-
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+	flag |= tcp_clean_rtx_queue(sk, prior_fackets);
 
+	if (tp->frto_counter)
+		frto_cwnd = tcp_process_frto(sk, flag);
 	/* Guarantee sacktag reordering detection against wrap-arounds */
 	if (before(tp->frto_highmark, tp->snd_una))
 		tp->frto_highmark = 0;
-	if (tp->frto_counter)
-		frto_cwnd = tcp_process_frto(sk, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
 		    tcp_may_raise_cwnd(sk, flag))
-			tcp_cong_avoid(sk, ack, prior_in_flight, 0);
-		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
+			tcp_cong_avoid(sk, ack, prior_in_flight);
+		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
+				      flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
-			tcp_cong_avoid(sk, ack, prior_in_flight, 1);
+			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
-	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
+	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
 		dst_confirm(sk->sk_dst_cache);
 
 	return 1;
@@ -3109,100 +3259,99 @@ uninteresting_ack:
 	return 0;
 }
 
-
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
+void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
+		       int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
-	int length=(th->doff*4)-sizeof(struct tcphdr);
+	int length = (th->doff * 4) - sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
 	while (length > 0) {
-		int opcode=*ptr++;
+		int opcode = *ptr++;
 		int opsize;
 
 		switch (opcode) {
-			case TCPOPT_EOL:
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
 				return;
-			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-				length--;
-				continue;
-			default:
-				opsize=*ptr++;
-				if (opsize < 2) /* "silly options" */
-					return;
-				if (opsize > length)
-					return;	/* don't parse partial options */
-				switch (opcode) {
-				case TCPOPT_MSS:
-					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
-						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
-						if (in_mss) {
-							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
-								in_mss = opt_rx->user_mss;
-							opt_rx->mss_clamp = in_mss;
-						}
+			if (opsize > length)
+				return;	/* don't parse partial options */
+			switch (opcode) {
+			case TCPOPT_MSS:
+				if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+					u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+					if (in_mss) {
+						if (opt_rx->user_mss &&
+						    opt_rx->user_mss < in_mss)
+							in_mss = opt_rx->user_mss;
+						opt_rx->mss_clamp = in_mss;
 					}
-					break;
-				case TCPOPT_WINDOW:
-					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
-						if (sysctl_tcp_window_scaling) {
-							__u8 snd_wscale = *(__u8 *) ptr;
-							opt_rx->wscale_ok = 1;
-							if (snd_wscale > 14) {
-								if (net_ratelimit())
-									printk(KERN_INFO "tcp_parse_options: Illegal window "
-									       "scaling value %d >14 received.\n",
-									       snd_wscale);
-								snd_wscale = 14;
-							}
-							opt_rx->snd_wscale = snd_wscale;
-						}
-					break;
-				case TCPOPT_TIMESTAMP:
-					if (opsize==TCPOLEN_TIMESTAMP) {
-						if ((estab && opt_rx->tstamp_ok) ||
-						    (!estab && sysctl_tcp_timestamps)) {
-							opt_rx->saw_tstamp = 1;
-							opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
-							opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
-						}
-					}
-					break;
-				case TCPOPT_SACK_PERM:
-					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
-						if (sysctl_tcp_sack) {
-							opt_rx->sack_ok = 1;
-							tcp_sack_reset(opt_rx);
-						}
+				}
+				break;
+			case TCPOPT_WINDOW:
+				if (opsize == TCPOLEN_WINDOW && th->syn &&
+				    !estab && sysctl_tcp_window_scaling) {
+					__u8 snd_wscale = *(__u8 *)ptr;
+					opt_rx->wscale_ok = 1;
+					if (snd_wscale > 14) {
+						if (net_ratelimit())
+							printk(KERN_INFO "tcp_parse_options: Illegal window "
+							       "scaling value %d >14 received.\n",
+							       snd_wscale);
+						snd_wscale = 14;
 					}
-					break;
+					opt_rx->snd_wscale = snd_wscale;
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if ((opsize == TCPOLEN_TIMESTAMP) &&
+				    ((estab && opt_rx->tstamp_ok) ||
+				     (!estab && sysctl_tcp_timestamps))) {
+					opt_rx->saw_tstamp = 1;
+					opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+					opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+				}
+				break;
+			case TCPOPT_SACK_PERM:
+				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+				    !estab && sysctl_tcp_sack) {
+					opt_rx->sack_ok = 1;
+					tcp_sack_reset(opt_rx);
+				}
+				break;
 
-				case TCPOPT_SACK:
-					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
-					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
-					   opt_rx->sack_ok) {
-						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
-					}
-					break;
+			case TCPOPT_SACK:
+				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+				   opt_rx->sack_ok) {
+					TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+				}
+				break;
 #ifdef CONFIG_TCP_MD5SIG
-				case TCPOPT_MD5SIG:
-					/*
-					 * The MD5 Hash has already been
-					 * checked (see tcp_v{4,6}_do_rcv()).
-					 */
-					break;
+			case TCPOPT_MD5SIG:
+				/*
+				 * The MD5 Hash has already been
+				 * checked (see tcp_v{4,6}_do_rcv()).
+				 */
+				break;
 #endif
-				}
+			}
 
-				ptr+=opsize-2;
-				length-=opsize;
+			ptr += opsize-2;
+			length -= opsize;
 		}
 	}
 }
@@ -3213,7 +3362,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 				  struct tcp_sock *tp)
 {
-	if (th->doff == sizeof(struct tcphdr)>>2) {
+	if (th->doff == sizeof(struct tcphdr) >> 2) {
 		tp->rx_opt.saw_tstamp = 0;
 		return 0;
 	} else if (tp->rx_opt.tstamp_ok &&
@@ -3298,7 +3447,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
 }
 
-static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
+static inline int tcp_paws_discard(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
@@ -3330,16 +3480,16 @@ static void tcp_reset(struct sock *sk)
 {
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
-		case TCP_SYN_SENT:
-			sk->sk_err = ECONNREFUSED;
-			break;
-		case TCP_CLOSE_WAIT:
-			sk->sk_err = EPIPE;
-			break;
-		case TCP_CLOSE:
-			return;
-		default:
-			sk->sk_err = ECONNRESET;
+	case TCP_SYN_SENT:
+		sk->sk_err = ECONNREFUSED;
+		break;
+	case TCP_CLOSE_WAIT:
+		sk->sk_err = EPIPE;
+		break;
+	case TCP_CLOSE:
+		return;
+	default:
+		sk->sk_err = ECONNRESET;
 	}
 
 	if (!sock_flag(sk, SOCK_DEAD))
@@ -3372,43 +3522,43 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	sock_set_flag(sk, SOCK_DONE);
 
 	switch (sk->sk_state) {
-		case TCP_SYN_RECV:
-		case TCP_ESTABLISHED:
-			/* Move to CLOSE_WAIT */
-			tcp_set_state(sk, TCP_CLOSE_WAIT);
-			inet_csk(sk)->icsk_ack.pingpong = 1;
-			break;
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		/* Move to CLOSE_WAIT */
+		tcp_set_state(sk, TCP_CLOSE_WAIT);
+		inet_csk(sk)->icsk_ack.pingpong = 1;
+		break;
 
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSING:
-			/* Received a retransmission of the FIN, do
-			 * nothing.
-			 */
-			break;
-		case TCP_LAST_ACK:
-			/* RFC793: Remain in the LAST-ACK state. */
-			break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+		/* Received a retransmission of the FIN, do
+		 * nothing.
+		 */
+		break;
+	case TCP_LAST_ACK:
+		/* RFC793: Remain in the LAST-ACK state. */
+		break;
 
-		case TCP_FIN_WAIT1:
-			/* This case occurs when a simultaneous close
-			 * happens, we must ack the received FIN and
-			 * enter the CLOSING state.
-			 */
-			tcp_send_ack(sk);
-			tcp_set_state(sk, TCP_CLOSING);
-			break;
-		case TCP_FIN_WAIT2:
-			/* Received a FIN -- send ACK and enter TIME_WAIT. */
-			tcp_send_ack(sk);
-			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-			break;
-		default:
-			/* Only TCP_LISTEN and TCP_CLOSE are left, in these
-			 * cases we should never reach this piece of code.
-			 */
-			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
-			       __FUNCTION__, sk->sk_state);
-			break;
+	case TCP_FIN_WAIT1:
+		/* This case occurs when a simultaneous close
+		 * happens, we must ack the received FIN and
+		 * enter the CLOSING state.
+		 */
+		tcp_send_ack(sk);
+		tcp_set_state(sk, TCP_CLOSING);
+		break;
+	case TCP_FIN_WAIT2:
+		/* Received a FIN -- send ACK and enter TIME_WAIT. */
+		tcp_send_ack(sk);
+		tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+		break;
+	default:
+		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
+		 * cases we should never reach this piece of code.
+		 */
+		printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+		       __FUNCTION__, sk->sk_state);
+		break;
 	}
 
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
@@ -3417,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	__skb_queue_purge(&tp->out_of_order_queue);
 	if (tcp_is_sack(tp))
 		tcp_sack_reset(&tp->rx_opt);
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		sk->sk_state_change(sk);
@@ -3425,13 +3575,14 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 		/* Do not send POLL_HUP for half duplex close. */
 		if (sk->sk_shutdown == SHUTDOWN_MASK ||
 		    sk->sk_state == TCP_CLOSE)
-			sk_wake_async(sk, 1, POLL_HUP);
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
 		else
-			sk_wake_async(sk, 1, POLL_IN);
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	}
 }
 
-static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
+				  u32 end_seq)
 {
 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
 		if (before(seq, sp->start_seq))
@@ -3454,7 +3605,8 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
+					   4 - tp->rx_opt.tstamp_ok);
 	}
 }
 
@@ -3494,12 +3646,12 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 {
 	int this_sack;
 	struct tcp_sack_block *sp = &tp->selective_acks[0];
-	struct tcp_sack_block *swalk = sp+1;
+	struct tcp_sack_block *swalk = sp + 1;
 
 	/* See if the recent change to the first SACK eats into
 	 * or hits the sequence space of other SACK blocks, if so coalesce.
 	 */
-	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
 		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
 			int i;
 
@@ -3507,16 +3659,19 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 * Decrease num_sacks.
 			 */
 			tp->rx_opt.num_sacks--;
-			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
-			for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
-				sp[i] = sp[i+1];
+			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+						   tp->rx_opt.dsack,
+						   4 - tp->rx_opt.tstamp_ok);
+			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
+				sp[i] = sp[i + 1];
 			continue;
 		}
 		this_sack++, swalk++;
 	}
 }
 
-static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
+				 struct tcp_sack_block *sack2)
 {
 	__u32 tmp;
 
@@ -3539,11 +3694,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	if (!cur_sacks)
 		goto new_sack;
 
-	for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
 		if (tcp_sack_extend(sp, seq, end_seq)) {
 			/* Rotate this_sack to the first one. */
-			for (; this_sack>0; this_sack--, sp--)
-				tcp_sack_swap(sp, sp-1);
+			for (; this_sack > 0; this_sack--, sp--)
+				tcp_sack_swap(sp, sp - 1);
 			if (cur_sacks > 1)
 				tcp_sack_maybe_coalesce(tp);
 			return;
@@ -3562,14 +3717,15 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		sp--;
 	}
 	for (; this_sack > 0; this_sack--, sp--)
-		*sp = *(sp-1);
+		*sp = *(sp - 1);
 
 new_sack:
 	/* Build the new head SACK, and we're done. */
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
 	tp->rx_opt.num_sacks++;
-	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
+				   4 - tp->rx_opt.tstamp_ok);
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -3587,7 +3743,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		return;
 	}
 
-	for (this_sack = 0; this_sack < num_sacks; ) {
+	for (this_sack = 0; this_sack < num_sacks;) {
 		/* Check if the start of the sack is covered by RCV.NXT. */
 		if (!before(tp->rcv_nxt, sp->start_seq)) {
 			int i;
@@ -3606,7 +3762,9 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	}
 	if (num_sacks != tp->rx_opt.num_sacks) {
 		tp->rx_opt.num_sacks = num_sacks;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+					   tp->rx_opt.dsack,
+					   4 - tp->rx_opt.tstamp_ok);
 	}
 }
 
@@ -3659,14 +3817,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
-	__skb_pull(skb, th->doff*4);
+	__skb_pull(skb, th->doff * 4);
 
 	TCP_ECN_accept_cwr(tp, skb);
 
 	if (tp->rx_opt.dsack) {
 		tp->rx_opt.dsack = 0;
 		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
-						    4 - tp->rx_opt.tstamp_ok);
+					     4 - tp->rx_opt.tstamp_ok);
 	}
 
 	/*  Queue data for delivery to the user.
@@ -3682,7 +3840,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
 		    sock_owned_by_user(sk) && !tp->urg_data) {
 			int chunk = min_t(unsigned int, skb->len,
-							tp->ucopy.len);
+					  tp->ucopy.len);
 
 			__set_current_state(TASK_RUNNING);
 
@@ -3700,12 +3858,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 queue_and_out:
 			if (eaten < 0 &&
 			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-			     !sk_stream_rmem_schedule(sk, skb))) {
+			     !sk_rmem_schedule(sk, skb->truesize))) {
 				if (tcp_prune_queue(sk) < 0 ||
-				    !sk_stream_rmem_schedule(sk, skb))
+				    !sk_rmem_schedule(sk, skb->truesize))
 					goto drop;
 			}
-			sk_stream_set_owner_r(skb, sk);
+			skb_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3774,9 +3932,9 @@ drop:
 	TCP_ECN_check_ce(tp, skb);
 
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_stream_rmem_schedule(sk, skb)) {
+	    !sk_rmem_schedule(sk, skb->truesize)) {
 		if (tcp_prune_queue(sk) < 0 ||
-		    !sk_stream_rmem_schedule(sk, skb))
+		    !sk_rmem_schedule(sk, skb->truesize))
 			goto drop;
 	}
 
@@ -3787,7 +3945,7 @@ drop:
 	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
 		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
-	sk_stream_set_owner_r(skb, sk);
+	skb_set_owner_r(skb, sk);
 
 	if (!skb_peek(&tp->out_of_order_queue)) {
 		/* Initial out of order segment, build 1 SACK. */
@@ -3799,7 +3957,7 @@ drop:
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
 		}
-		__skb_queue_head(&tp->out_of_order_queue,skb);
+		__skb_queue_head(&tp->out_of_order_queue, skb);
 	} else {
 		struct sk_buff *skb1 = tp->out_of_order_queue.prev;
 		u32 seq = TCP_SKB_CB(skb)->seq;
@@ -3822,10 +3980,10 @@ drop:
 			if (!after(TCP_SKB_CB(skb1)->seq, seq))
 				break;
 		} while ((skb1 = skb1->prev) !=
-			 (struct sk_buff*)&tp->out_of_order_queue);
+			 (struct sk_buff *)&tp->out_of_order_queue);
 
 		/* Do skb overlap to previous one? */
-		if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+		if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
 		    before(seq, TCP_SKB_CB(skb1)->end_seq)) {
 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
 				/* All the bits are present. Drop. */
@@ -3835,7 +3993,8 @@ drop:
 			}
 			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
 				/* Partial overlap. */
-				tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+				tcp_dsack_set(tp, seq,
+					      TCP_SKB_CB(skb1)->end_seq);
 			} else {
 				skb1 = skb1->prev;
 			}
@@ -3844,15 +4003,17 @@ drop:
 
 		/* And clean segments covered by new one as whole. */
 		while ((skb1 = skb->next) !=
-		       (struct sk_buff*)&tp->out_of_order_queue &&
+		       (struct sk_buff *)&tp->out_of_order_queue &&
 		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
-		       if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-			       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
-			       break;
-		       }
-		       __skb_unlink(skb1, &tp->out_of_order_queue);
-		       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
-		       __kfree_skb(skb1);
+			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+				tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+						 end_seq);
+				break;
+			}
+			__skb_unlink(skb1, &tp->out_of_order_queue);
+			tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+					 TCP_SKB_CB(skb1)->end_seq);
+			__kfree_skb(skb1);
 		}
 
 add_sack:
@@ -3875,7 +4036,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 
 	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
-	for (skb = head; skb != tail; ) {
+	for (skb = head; skb != tail;) {
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 			struct sk_buff *next = skb->next;
@@ -3913,9 +4074,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		/* Too big header? This can happen with IPv6. */
 		if (copy < 0)
 			return;
-		if (end-start < copy)
-			copy = end-start;
-		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+		if (end - start < copy)
+			copy = end - start;
+		nskb = alloc_skb(copy + header, GFP_ATOMIC);
 		if (!nskb)
 			return;
 
@@ -3929,7 +4090,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		__skb_insert(nskb, skb->prev, skb, list);
-		sk_stream_set_owner_r(nskb, sk);
+		skb_set_owner_r(nskb, sk);
 
 		/* Copy data, releasing collapsed skbs. */
 		while (copy > 0) {
@@ -4025,9 +4186,9 @@ static int tcp_prune_queue(struct sock *sk)
 	tcp_collapse_ofo_queue(sk);
 	tcp_collapse(sk, &sk->sk_receive_queue,
 		     sk->sk_receive_queue.next,
-		     (struct sk_buff*)&sk->sk_receive_queue,
+		     (struct sk_buff *)&sk->sk_receive_queue,
 		     tp->copied_seq, tp->rcv_nxt);
-	sk_stream_mem_reclaim(sk);
+	sk_mem_reclaim(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
 		return 0;
@@ -4047,7 +4208,7 @@ static int tcp_prune_queue(struct sock *sk)
 		 */
 		if (tcp_is_sack(tp))
 			tcp_sack_reset(&tp->rx_opt);
-		sk_stream_mem_reclaim(sk);
+		sk_mem_reclaim(sk);
 	}
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
@@ -4064,7 +4225,6 @@ static int tcp_prune_queue(struct sock *sk)
 	return -1;
 }
 
-
 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
  * As additional protections, we do not touch cwnd in retransmission phases,
  * and if application hit its sndbuf limit recently.
@@ -4126,8 +4286,8 @@ static void tcp_new_space(struct sock *sk)
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
-						   tp->reordering + 1);
-		sndmem *= 2*demanded;
+				     tp->reordering + 1);
+		sndmem *= 2 * demanded;
 		if (sndmem > sk->sk_sndbuf)
 			sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
 		tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -4168,8 +4328,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* We have out of order data. */
-	    (ofo_possible &&
-	     skb_peek(&tp->out_of_order_queue))) {
+	    (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
 		/* Then ack it now */
 		tcp_send_ack(sk);
 	} else {
@@ -4197,7 +4356,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
  *	either form (or just set the sysctl tcp_stdurg).
  */
 
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 ptr = ntohs(th->urg_ptr);
@@ -4246,8 +4405,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	 * buggy users.
 	 */
 	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
-	    !sock_flag(sk, SOCK_URGINLINE) &&
-	    tp->copied_seq != tp->rcv_nxt) {
+	    !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 		tp->copied_seq++;
 		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
@@ -4256,8 +4414,8 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 		}
 	}
 
-	tp->urg_data   = TCP_URG_NOTYET;
-	tp->urg_seq    = ptr;
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = ptr;
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -4270,7 +4428,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 
 	/* Check if we get a new urgent pointer - normally not. */
 	if (th->urg)
-		tcp_check_urg(sk,th);
+		tcp_check_urg(sk, th);
 
 	/* Do we wait for any urgent data? - normally not... */
 	if (tp->urg_data == TCP_URG_NOTYET) {
@@ -4312,7 +4470,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 	return err;
 }
 
-static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static __sum16 __tcp_checksum_complete_user(struct sock *sk,
+					    struct sk_buff *skb)
 {
 	__sum16 result;
 
@@ -4326,14 +4485,16 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
 	return result;
 }
 
-static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk,
+					     struct sk_buff *skb)
 {
 	return !skb_csum_unnecessary(skb) &&
-		__tcp_checksum_complete_user(sk, skb);
+	       __tcp_checksum_complete_user(sk, skb);
 }
 
 #ifdef CONFIG_NET_DMA
-static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
+				  int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int chunk = skb->len - hlen;
@@ -4349,7 +4510,9 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
 
 		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
-			skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
+							 skb, hlen,
+							 tp->ucopy.iov, chunk,
+							 tp->ucopy.pinned_list);
 
 		if (dma_cookie < 0)
 			goto out;
@@ -4431,7 +4594,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	 */
 
 	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
-		TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
 		int tcp_header_len = tp->tcp_header_len;
 
 		/* Timestamp header prediction: tcp_header_len
@@ -4500,7 +4663,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					eaten = 1;
 				}
 #endif
-				if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
+				if (tp->ucopy.task == current &&
+				    sock_owned_by_user(sk) && !copied_early) {
 					__set_current_state(TASK_RUNNING);
 
 					if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
@@ -4547,9 +4711,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
-				__skb_pull(skb,tcp_header_len);
+				__skb_pull(skb, tcp_header_len);
 				__skb_queue_tail(&sk->sk_receive_queue, skb);
-				sk_stream_set_owner_r(skb, sk);
+				skb_set_owner_r(skb, sk);
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 			}
 
@@ -4579,7 +4743,7 @@ no_ack:
 	}
 
 slow_path:
-	if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
+	if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
 		goto csum_error;
 
 	/*
@@ -4786,7 +4950,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (!sock_flag(sk, SOCK_DEAD)) {
 			sk->sk_state_change(sk);
-			sk_wake_async(sk, 0, POLL_OUT);
+			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 		}
 
 		if (sk->sk_write_pending ||
@@ -4829,7 +4993,8 @@ discard:
 	}
 
 	/* PAWS check. */
-	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
+	    tcp_paws_check(&tp->rx_opt, 0))
 		goto discard_and_undo;
 
 	if (th->syn) {
@@ -4864,7 +5029,6 @@ discard:
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
-
 		tcp_send_synack(sk);
 #if 0
 		/* Note, we could accept data and URG from this segment.
@@ -4896,7 +5060,6 @@ reset_and_undo:
 	return 1;
 }
 
-
 /*
  *	This function implements the receiving procedure of RFC 793 for
  *	all states except ESTABLISHED and TIME_WAIT.
@@ -5016,9 +5179,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * are not waked up, because sk->sk_sleep ==
 				 * NULL and sk->sk_socket == NULL.
 				 */
-				if (sk->sk_socket) {
-					sk_wake_async(sk,0,POLL_OUT);
-				}
+				if (sk->sk_socket)
+					sk_wake_async(sk,
+						      SOCK_WAKE_IO, POLL_OUT);
 
 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 				tp->snd_wnd = ntohs(th->window) <<
@@ -5030,8 +5193,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * and does not calculate rtt.
 				 * Fix it at least with timestamps.
 				 */
-				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-				    !tp->srtt)
+				if (tp->rx_opt.saw_tstamp &&
+				    tp->rx_opt.rcv_tsecr && !tp->srtt)
 					tcp_ack_saw_tstamp(sk, 0);
 
 				if (tp->rx_opt.tstamp_ok)