summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_input.c
diff options
context:
space:
mode:
authorprovos <provos@openbsd.org>1998-11-17 19:23:00 +0000
committerprovos <provos@openbsd.org>1998-11-17 19:23:00 +0000
commit201dac0fe332deeedd3063648eea7a618c7049bf (patch)
treec783d955167db8ef59364a6dbe29ae101d63195a /sys/netinet/tcp_input.c
parentAdd RCS Ids from the EOM repository (diff)
downloadwireguard-openbsd-201dac0fe332deeedd3063648eea7a618c7049bf.tar.xz
wireguard-openbsd-201dac0fe332deeedd3063648eea7a618c7049bf.zip
NewReno, SACK and FACK support for TCP, adapted from code for BSDI
by Hari Balakrishnan (hari@lcs.mit.edu), Tom Henderson (tomh@cs.berkeley.edu) and Venkat Padmanabhan (padmanab@cs.berkeley.edu) as part of the Daedalus research group at the University of California, (http://daedalus.cs.berkeley.edu). [I was able to do this on time spent at the Center for Information Technology Integration (citi.umich.edu)]
Diffstat (limited to 'sys/netinet/tcp_input.c')
-rw-r--r--sys/netinet/tcp_input.c705
1 files changed, 694 insertions, 11 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index cacdcc1b9e8..72f19aafa92 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: tcp_input.c,v 1.20 1998/10/28 21:34:32 provos Exp $ */
+/* $OpenBSD: tcp_input.c,v 1.21 1998/11/17 19:23:01 provos Exp $ */
/* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */
/*
@@ -609,6 +609,11 @@ findpcb:
if (tp->t_state != TCPS_SYN_RECEIVED)
tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+#ifdef TCP_SACK
+ if (!tp->sack_disable)
+ tcp_del_sackholes(tp, ti); /* Delete stale SACK holes */
+#endif /* TCP_SACK */
+
/*
* Process options if not in LISTEN state,
* else do it below (after getting remote address).
@@ -617,6 +622,12 @@ findpcb:
tcp_dooptions(tp, optp, optlen, ti,
&ts_present, &ts_val, &ts_ecr);
+#ifdef TCP_SACK
+ if (!tp->sack_disable) {
+ tp->rcv_laststart = ti->ti_seq; /* last rec'vd segment*/
+ tp->rcv_lastend = ti->ti_seq + ti->ti_len;
+ }
+#endif /* TCP_SACK */
/*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
@@ -652,7 +663,7 @@ findpcb:
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
tp->snd_cwnd >= tp->snd_wnd &&
- tp->t_dupacks < tcprexmtthresh) {
+ tp->t_dupacks == 0) {
/*
* this is a pure ack for outstanding data.
*/
@@ -667,6 +678,10 @@ findpcb:
tcpstat.tcps_rcvackbyte += acked;
sbdrop(&so->so_snd, acked);
tp->snd_una = ti->ti_ack;
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ tp->snd_fack = tp->snd_una;
+ tp->retran_data = 0;
+#endif /* TCP_FACK */
m_freem(m);
/*
@@ -697,6 +712,11 @@ findpcb:
* with nothing on the reassembly queue and
* we have enough buffer space to take it.
*/
+#ifdef TCP_SACK
+ /* Clean receiver SACK report if present */
+ if (!tp->sack_disable && tp->rcv_numsacks)
+ tcp_clean_sackreport(tp);
+#endif /* TCP_SACK */
++tcpstat.tcps_preddat;
tp->rcv_nxt += ti->ti_len;
tcpstat.tcps_rcvpack++;
@@ -822,6 +842,17 @@ findpcb:
if (optp)
tcp_dooptions(tp, optp, optlen, ti,
&ts_present, &ts_val, &ts_ecr);
+#ifdef TCP_SACK
+ /*
+ * If peer did not send a SACK_PERMITTED option (i.e., if
+ * tcp_dooptions() did not set TF_SACK_PERMIT), set
+ * sack_disable to 1 if it is currently 0.
+ */
+ if (!tp->sack_disable)
+ if ((tp->t_flags & TF_SACK_PERMIT) == 0)
+ tp->sack_disable = 1;
+#endif
+
if (iss)
tp->iss = iss;
else
@@ -833,6 +864,14 @@ findpcb:
#endif /* !TCP_COMPAT_42 */
tp->irs = ti->ti_seq;
tcp_sendseqinit(tp);
+#if defined (TCP_SACK) || defined (TCP_NEWRENO)
+ tp->snd_last = tp->snd_una;
+#endif /* TCP_SACK || TCP_NEWRENO */
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ tp->snd_fack = tp->snd_una;
+ tp->retran_data = 0;
+ tp->snd_awnd = 0;
+#endif /* TCP_FACK */
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
tp->t_state = TCPS_SYN_RECEIVED;
@@ -893,6 +932,16 @@ findpcb:
tp->irs = ti->ti_seq;
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
+#ifdef TCP_SACK
+ /*
+ * If we've sent a SACK_PERMITTED option, and the peer
+ * also replied with one, then TF_SACK_PERMIT should have
+ * been set in tcp_dooptions(). If it was not, disable SACKs.
+ */
+ if (!tp->sack_disable)
+ if ((tp->t_flags & TF_SACK_PERMIT) == 0)
+ tp->sack_disable = 1;
+#endif
if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
tcpstat.tcps_connects++;
soisconnected(so);
@@ -911,6 +960,15 @@ findpcb:
*/
if (tp->t_rtt)
tcp_xmit_timer(tp, tp->t_rtt);
+ /*
+ * Since new data was acked (the SYN), open the
+ * congestion window by one MSS. We do this
+ * here, because we won't go through the normal
+ * ACK processing below. And since this is the
+ * start of the connection, we know we are in
+ * the exponential phase of slow-start.
+ */
+ tp->snd_cwnd += tp->t_maxseg;
} else
tp->t_state = TCPS_SYN_RECEIVED;
@@ -1169,7 +1227,31 @@ trimthenstep6:
case TCPS_TIME_WAIT:
if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
- if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+ /*
+ * Duplicate/old ACK processing.
+ * Increments t_dupacks:
+ * Pure duplicate (same seq/ack/window, no data)
+ * Doesn't affect t_dupacks:
+ * Data packets.
+ * Normal window updates (window opens)
+ * Resets t_dupacks:
+ * New data ACKed.
+ * Window shrinks
+ * Old ACK
+ */
+ if (ti->ti_len)
+ break;
+ /*
+ * If we get an old ACK, there is probably packet
+ * reordering going on. Be conservative and reset
+ * t_dupacks so that we are less agressive in
+ * doing a fast retransmit.
+ */
+ if (ti->ti_ack != tp->snd_una) {
+ tp->t_dupacks = 0;
+ break;
+ }
+ if (tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
/*
* If we have outstanding data (other than
@@ -1195,45 +1277,186 @@ trimthenstep6:
* to keep a constant cwnd packets in the
* network.
*/
- if (tp->t_timer[TCPT_REXMT] == 0 ||
- ti->ti_ack != tp->snd_una)
+ if (tp->t_timer[TCPT_REXMT] == 0)
tp->t_dupacks = 0;
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ /*
+ * In FACK, can enter fast rec. if the receiver
+ * reports a reass. queue longer than 3 segs.
+ */
+ else if (++tp->t_dupacks == tcprexmtthresh ||
+ ((SEQ_GT(tp->snd_fack, tcprexmtthresh *
+ tp->t_maxseg + tp->snd_una)) &&
+ SEQ_GT(tp->snd_una, tp->snd_last))) {
+#else
else if (++tp->t_dupacks == tcprexmtthresh) {
+#endif /* TCP_FACK */
tcp_seq onxt = tp->snd_nxt;
u_int win =
min(tp->snd_wnd, tp->snd_cwnd) / 2 /
tp->t_maxseg;
+#if defined(TCP_SACK) || defined(TCP_NEWRENO)
+ if (SEQ_LT(ti->ti_ack, tp->snd_last)){
+ /*
+ * False fast retx after
+ * timeout. Do not cut window.
+ */
+ tp->snd_cwnd += tp->t_maxseg;
+ tp->t_dupacks = 0;
+ (void) tcp_output(tp);
+ goto drop;
+ }
+#endif
if (win < 2)
win = 2;
tp->snd_ssthresh = win * tp->t_maxseg;
+#if defined(TCP_SACK) || defined(TCP_NEWRENO)
+ tp->snd_last = tp->snd_max;
+#endif
+#ifdef TCP_SACK
+ if (!tp->sack_disable) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tcpstat.tcps_sndrexmitfast++;
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ (void) tcp_output(tp);
+ /*
+ * During FR, snd_cwnd is held
+ * constant for FACK.
+ */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->t_dupacks = tcprexmtthresh;
+#else
+ /*
+ * tcp_output() will send
+ * oldest SACK-eligible rtx.
+ */
+ (void) tcp_output(tp);
+ tp->snd_cwnd = tp->snd_ssthresh+
+ tp->t_maxseg * tp->t_dupacks;
+#endif /* TCP_FACK */
+ /*
+ * It is possible for
+ * tcp_output to fail to send
+ * a segment. If so, make
+ * sure that REMXT timer is set.
+ */
+ if (SEQ_GT(tp->snd_max,
+ tp->snd_una) &&
+ tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] =
+ tp->t_rxtcur;
+ goto drop;
+ }
+#endif /* TCP_SACK */
tp->t_timer[TCPT_REXMT] = 0;
tp->t_rtt = 0;
tp->snd_nxt = ti->ti_ack;
tp->snd_cwnd = tp->t_maxseg;
+ tcpstat.tcps_sndrexmitfast++;
(void) tcp_output(tp);
+
tp->snd_cwnd = tp->snd_ssthresh +
tp->t_maxseg * tp->t_dupacks;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
goto drop;
} else if (tp->t_dupacks > tcprexmtthresh) {
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ /*
+ * while (awnd < cwnd)
+ * sendsomething();
+ */
+ if (!tp->sack_disable) {
+ if (tp->snd_awnd < tp->snd_cwnd)
+ tcp_output(tp);
+ goto drop;
+ }
+#endif /* TCP_FACK */
tp->snd_cwnd += tp->t_maxseg;
(void) tcp_output(tp);
goto drop;
}
- } else
+ } else if (tiwin < tp->snd_wnd) {
+ /*
+ * The window was retracted! Previous dup
+ * ACKs may have been due to packets arriving
+ * after the shrunken window, not a missing
+ * packet, so play it safe and reset t_dupacks
+ */
tp->t_dupacks = 0;
+ }
break;
}
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
+#ifdef TCP_NEWRENO
+ if (tp->t_dupacks >= tcprexmtthresh && !tcp_newreno(tp, ti)) {
+ /* Out of fast recovery */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ /*
+ * Window inflation should have left us with approx.
+ * snd_ssthresh outstanding data. But in case we
+ * would be inclined to send a burst, better to do
+ * it via the slow start mechanism.
+ */
+ if (tcp_seq_subtract(tp->snd_max, ti->ti_ack) <
+ tp->snd_ssthresh)
+ tp->snd_cwnd = tcp_seq_subtract(tp->snd_max,
+ ti->ti_ack) + tp->t_maxseg;
+ tp->t_dupacks = 0;
+ }
+#elif defined(TCP_SACK)
+ if (!tp->sack_disable) {
+ if (tp->t_dupacks >= tcprexmtthresh) {
+ /* Check for a partial ACK */
+ if (tcp_sack_partialack(tp, ti)) {
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ /* Force call to tcp_output */
+ if (tp->snd_awnd < tp->snd_cwnd)
+ needoutput = 1;
+#else
+ tp->snd_cwnd += tp->t_maxseg;
+ needoutput = 1;
+#endif /* TCP_FACK */
+ } else {
+ /* Out of fast recovery */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ if (tcp_seq_subtract(tp->snd_max,
+ ti->ti_ack) < tp->snd_ssthresh)
+ tp->snd_cwnd =
+ tcp_seq_subtract(tp->snd_max,
+ ti->ti_ack) + tp->t_maxseg;
+ tp->t_dupacks = 0;
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ if (SEQ_GT(ti->ti_ack, tp->snd_fack))
+ tp->snd_fack = ti->ti_ack;
+#endif /* TCP_FACK */
+ }
+ }
+ } else {
+ if (tp->t_dupacks >= tcprexmtthresh &&
+ !tcp_newreno(tp, ti)) {
+ /* Out of fast recovery */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ if (tcp_seq_subtract(tp->snd_max, ti->ti_ack) <
+ tp->snd_ssthresh)
+ tp->snd_cwnd =
+ tcp_seq_subtract(tp->snd_max,
+ ti->ti_ack) + tp->t_maxseg;
+ tp->t_dupacks = 0;
+ }
+ }
+#else /* else neither TCP_NEWRENO nor TCP_SACK */
if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
+#endif
if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
@@ -1272,9 +1495,7 @@ trimthenstep6:
* If the window gives us less than ssthresh packets
* in flight, open exponentially (maxseg per packet).
* Otherwise open linearly: maxseg per window
- * (maxseg^2 / cwnd per packet), plus a constant
- * fraction of a packet (maxseg/8) to help larger windows
- * open quickly enough.
+ * (maxseg^2 / cwnd per packet).
*/
{
register u_int cw = tp->snd_cwnd;
@@ -1282,6 +1503,9 @@ trimthenstep6:
if (cw > tp->snd_ssthresh)
incr = incr * incr / cw;
+#if defined (TCP_NEWRENO) || defined (TCP_SACK)
+ if (SEQ_GEQ(ti->ti_ack, tp->snd_last))
+#endif
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
}
if (acked > so->so_snd.sb_cc) {
@@ -1298,6 +1522,10 @@ trimthenstep6:
tp->snd_una = ti->ti_ack;
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
tp->snd_nxt = tp->snd_una;
+#if defined (TCP_SACK) && defined (TCP_FACK)
+ if (SEQ_GT(tp->snd_una, tp->snd_fack))
+ tp->snd_fack = tp->snd_una;
+#endif
switch (tp->t_state) {
@@ -1454,6 +1682,10 @@ dodata: /* XXX */
if ((ti->ti_len || (tiflags & TH_FIN)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
TCP_REASS(tp, ti, m, so, tiflags);
+#ifdef TCP_SACK
+ if (!tp->sack_disable)
+ tcp_update_sack_list(tp);
+#endif
/*
* Note the amount of data that peer has sent into
* our window, in order to estimate the sender's
@@ -1519,8 +1751,20 @@ dodata: /* XXX */
/*
* Return any desired output.
*/
- if (needoutput || (tp->t_flags & TF_ACKNOW))
+ if (needoutput || (tp->t_flags & TF_ACKNOW)) {
(void) tcp_output(tp);
+#ifdef TCP_SACK
+ /*
+ * In SACK, it is possible for tcp_output() to fail to send a segment
+ * after the retransmission timer has been turned off. Make sure that
+ * the retransmission timer is set if we are in fast recovery.
+ */
+ if (needoutput && SEQ_GT(tp->snd_max, tp->snd_una) &&
+ tp->t_timer[TCPT_REXMT] == 0 &&
+ tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+#endif
+ }
return;
dropafterack:
@@ -1636,6 +1880,20 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
tp->ts_recent_age = tcp_now;
}
break;
+
+#ifdef TCP_SACK
+ case TCPOPT_SACK_PERMITTED:
+ if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED)
+ continue;
+ if (ti->ti_flags & TH_SYN)
+ /* MUST only be set on SYN */
+ tp->t_flags |= TF_SACK_PERMIT;
+ break;
+ case TCPOPT_SACK:
+ if (tcp_sack_option(tp, ti, cp, optlen))
+ continue;
+ break;
+#endif
}
}
/* Update t_maxopd and t_maxseg after all options are processed */
@@ -1643,6 +1901,395 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
(void) tcp_mss(tp, mss); /* sets t_maxseg */
}
+#if defined(TCP_SACK) || defined(TCP_NEWRENO)
+u_long
+tcp_seq_subtract(a, b)
+ u_long a, b;
+{
+ return ((long)(a - b));
+}
+#endif
+
+
+#ifdef TCP_SACK
+/*
+ * This function is called upon receipt of new valid data (while not in header
+ * prediction mode), and it updates the ordered list of sacks.
+ */
+void
+tcp_update_sack_list(tp)
+ struct tcpcb *tp;
+{
+ /*
+ * First reported block MUST be the most recent one. Subsequent
+ * blocks SHOULD be in the order in which they arrived at the
+ * receiver. These two conditions make the implementation fully
+ * compliant with RFC 2018.
+ */
+ int i, j = 0, count = 0, lastpos = -1;
+ struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
+
+ /* First clean up current list of sacks */
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ sack = tp->sackblks[i];
+ if (sack.start == 0 && sack.end == 0) {
+ count++; /* count = number of blocks to be discarded */
+ continue;
+ }
+ if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
+ tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ count++;
+ } else {
+ temp[j].start = tp->sackblks[i].start;
+ temp[j++].end = tp->sackblks[i].end;
+ }
+ }
+ tp->rcv_numsacks -= count;
+ if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
+ tcp_clean_sackreport(tp);
+ if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) {
+ /* ==> need first sack block */
+ tp->sackblks[0].start = tp->rcv_laststart;
+ tp->sackblks[0].end = tp->rcv_lastend;
+ tp->rcv_numsacks = 1;
+ }
+ return;
+ }
+ /* Otherwise, sack blocks are already present. */
+ for (i = 0; i < tp->rcv_numsacks; i++)
+ tp->sackblks[i] = temp[i]; /* first copy back sack list */
+ if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend))
+ return; /* sack list remains unchanged */
+ /*
+ * From here, segment just received should be (part of) the 1st sack.
+ * Go through list, possibly coalescing sack block entries.
+ */
+ firstsack.start = tp->rcv_laststart;
+ firstsack.end = tp->rcv_lastend;
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ sack = tp->sackblks[i];
+ if (SEQ_LT(sack.end, firstsack.start) ||
+ SEQ_GT(sack.start, firstsack.end))
+ continue; /* no overlap */
+ if (sack.start == firstsack.start && sack.end == firstsack.end){
+ /*
+ * identical block; delete it here since we will
+ * move it to the front of the list.
+ */
+ tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ lastpos = i; /* last posn with a zero entry */
+ continue;
+ }
+ if (SEQ_LEQ(sack.start, firstsack.start))
+ firstsack.start = sack.start; /* merge blocks */
+ if (SEQ_GEQ(sack.end, firstsack.end))
+ firstsack.end = sack.end; /* merge blocks */
+ tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ lastpos = i; /* last posn with a zero entry */
+ }
+ if (lastpos != -1) { /* at least one merge */
+ for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
+ sack = tp->sackblks[i];
+ if (sack.start == 0 && sack.end == 0)
+ continue;
+ temp[j++] = sack;
+ }
+ tp->rcv_numsacks = j; /* including first blk (added later) */
+ for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
+ tp->sackblks[i] = temp[i];
+ } else { /* no merges -- shift sacks by 1 */
+ if (tp->rcv_numsacks < MAX_SACK_BLKS)
+ tp->rcv_numsacks++;
+ for (i = tp->rcv_numsacks-1; i > 0; i--)
+ tp->sackblks[i] = tp->sackblks[i-1];
+ }
+ tp->sackblks[0] = firstsack;
+ return;
+}
+
+/*
+ * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue,
+ * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list
+ * of holes (oldest to newest, in terms of the sequence space).
+ */
+int
+tcp_sack_option(tp, ti, cp, optlen)
+ struct tcpcb *tp;
+ struct tcpiphdr *ti;
+ u_char *cp;
+ int optlen;
+{
+ int tmp_olen;
+ u_char *tmp_cp;
+ struct sackhole *cur, *p, *temp;
+
+ if (tp->sack_disable)
+ return 1;
+
+ /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
+ if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
+ return 1;
+ tmp_cp = cp + 2;
+ tmp_olen = optlen - 2;
+ if (tp->snd_numholes < 0)
+ tp->snd_numholes = 0;
+ if (tp->t_maxseg == 0)
+ panic("tcp_sack_option"); /* Should never happen */
+ while (tmp_olen > 0) {
+ struct sackblk sack;
+
+ bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq));
+ NTOHL(sack.start);
+ bcopy((char *) tmp_cp + sizeof(tcp_seq),
+ (char *) &(sack.end), sizeof(tcp_seq));
+ NTOHL(sack.end);
+ tmp_olen -= TCPOLEN_SACK;
+ tmp_cp += TCPOLEN_SACK;
+ if (SEQ_LEQ(sack.end, sack.start))
+ continue; /* bad SACK fields */
+ if (SEQ_LEQ(sack.end, tp->snd_una))
+ continue; /* old block */
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ /* Updates snd_fack. */
+ if (SEQ_GEQ(sack.end, tp->snd_fack))
+ tp->snd_fack = sack.end;
+#endif /* TCP_FACK */
+ if (tp->snd_holes == 0) { /* first hole */
+ tp->snd_holes = (struct sackhole *)
+ malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT);
+ cur = tp->snd_holes;
+ cur->start = ti->ti_ack;
+ cur->end = sack.start;
+ cur->rxmit = cur->start;
+ cur->next = 0;
+ tp->snd_numholes = 1;
+ tp->rcv_lastsack = sack.end;
+ /*
+ * dups is at least one. If more data has been
+ * SACKed, it can be greater than one.
+ */
+ cur->dups = min(tcprexmtthresh,
+ ((sack.end - cur->end)/tp->t_maxseg));
+ if (cur->dups < 1)
+ cur->dups = 1;
+ continue; /* with next sack block */
+ }
+ /* Go thru list of holes: p = previous, cur = current */
+ p = cur = tp->snd_holes;
+ while (cur) {
+ if (SEQ_LEQ(sack.end, cur->start))
+ /* SACKs data before the current hole */
+ break; /* no use going through more holes */
+ if (SEQ_GEQ(sack.start, cur->end)) {
+ /* SACKs data beyond the current hole */
+ cur->dups++;
+ if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ tcprexmtthresh)
+ cur->dups = tcprexmtthresh;
+ p = cur;
+ cur = cur->next;
+ continue;
+ }
+ if (SEQ_LEQ(sack.start, cur->start)) {
+ /* Data acks at least the beginning of hole */
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ if (SEQ_GT(sack.end, cur->rxmit))
+ tp->retran_data -=
+ tcp_seq_subtract(cur->rxmit,
+ cur->start);
+ else
+ tp->retran_data -=
+ tcp_seq_subtract(sack.end,
+ cur->start);
+#endif /* TCP_FACK */
+ if (SEQ_GEQ(sack.end,cur->end)){
+ /* Acks entire hole, so delete hole */
+ if (p != cur) {
+ p->next = cur->next;
+ free(cur, M_PCB);
+ cur = p->next;
+ } else {
+ cur=cur->next;
+ free(p, M_PCB);
+ p = cur;
+ tp->snd_holes = p;
+ }
+ tp->snd_numholes--;
+ continue;
+ }
+ /* otherwise, move start of hole forward */
+ cur->start = sack.end;
+ cur->rxmit = max (cur->rxmit, cur->start);
+ p = cur;
+ cur = cur->next;
+ continue;
+ }
+ /* move end of hole backward */
+ if (SEQ_GEQ(sack.end, cur->end)) {
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ if (SEQ_GT(cur->rxmit, sack.start))
+ tp->retran_data -=
+ tcp_seq_subtract(cur->rxmit,
+ sack.start);
+#endif /* TCP_FACK */
+ cur->end = sack.start;
+ cur->rxmit = min (cur->rxmit, cur->end);
+ cur->dups++;
+ if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ tcprexmtthresh)
+ cur->dups = tcprexmtthresh;
+ p = cur;
+ cur = cur->next;
+ continue;
+ }
+ if (SEQ_LT(cur->start, sack.start) &&
+ SEQ_GT(cur->end, sack.end)) {
+ /*
+ * ACKs some data in middle of a hole; need to
+ * split current hole
+ */
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ if (SEQ_GT(cur->rxmit, sack.end))
+ tp->retran_data -=
+ tcp_seq_subtract(sack.end,
+ sack.start);
+ else if (SEQ_GT(cur->rxmit, sack.start))
+ tp->retran_data -=
+ tcp_seq_subtract(cur->rxmit,
+ sack.start);
+#endif /* TCP_FACK */
+ temp = (struct sackhole *)malloc(sizeof(*temp),
+ M_PCB,M_NOWAIT);
+ temp->next = cur->next;
+ temp->start = sack.end;
+ temp->end = cur->end;
+ temp->dups = cur->dups;
+ temp->rxmit = max (cur->rxmit, temp->start);
+ cur->end = sack.start;
+ cur->rxmit = min (cur->rxmit, cur->end);
+ cur->dups++;
+ if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ tcprexmtthresh)
+ cur->dups = tcprexmtthresh;
+ cur->next = temp;
+ p = temp;
+ cur = p->next;
+ tp->snd_numholes++;
+ }
+ }
+ /* At this point, p points to the last hole on the list */
+ if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
+ /*
+ * Need to append new hole at end.
+ * Last hole is p (and it's not NULL).
+ */
+ temp = (struct sackhole *) malloc(sizeof(*temp),
+ M_PCB, M_NOWAIT);
+ temp->start = tp->rcv_lastsack;
+ temp->end = sack.start;
+ temp->dups = min(tcprexmtthresh,
+ ((sack.end - sack.start)/tp->t_maxseg));
+ if (temp->dups < 1)
+ temp->dups = 1;
+ temp->rxmit = temp->start;
+ temp->next = 0;
+ p->next = temp;
+ tp->rcv_lastsack = sack.end;
+ tp->snd_numholes++;
+ }
+ }
+#if defined(TCP_SACK) && defined(TCP_FACK)
+ /*
+ * Update retran_data, snd_fack, and snd_awnd. Go through the list of
+ * holes. Increment retran_data by (hole->rxmit - hole->start).
+ * snd_fack gets the highest value of hole->end.
+ */
+ tp->retran_data = 0;
+ cur = tp->snd_holes;
+ while (cur) {
+ tp->retran_data += cur->rxmit - cur->start;
+ cur = cur->next;
+ }
+ tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) +
+ tp->retran_data;
+#endif /* TCP_FACK */
+
+ return 0;
+}
+
+/*
+ * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if
+ * it is completely acked; otherwise, tcp_sack_option(), called from
+ * tcp_dooptions(), will fix up the hole.
+ */
+void
+tcp_del_sackholes(tp, ti)
+ struct tcpcb *tp;
+ struct tcpiphdr *ti;
+{
+ if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) {
+ /* max because this could be an older ack just arrived */
+ tcp_seq lastack = max(ti->ti_ack, tp->snd_una);
+ struct sackhole *cur = tp->snd_holes;
+ struct sackhole *prev = cur;
+ while (cur)
+ if (SEQ_LEQ(cur->end, lastack)) {
+ cur = cur->next;
+ free(prev, M_PCB);
+ prev = cur;
+ tp->snd_numholes--;
+ } else if (SEQ_LT(cur->start, lastack)) {
+ cur->start = lastack;
+ break;
+ } else
+ break;
+ tp->snd_holes = cur;
+ }
+}
+
+/*
+ * Delete all receiver-side SACK information.
+ */
+void
+tcp_clean_sackreport(tp)
+ struct tcpcb *tp;
+{
+ int i;
+
+ tp->rcv_numsacks = 0;
+ for (i = 0; i < MAX_SACK_BLKS; i++)
+ tp->sackblks[i].start = tp->sackblks[i].end=0;
+
+}
+
+/*
+ * Checks for partial ack. If partial ack arrives, turn off retransmission
+ * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
+ * If the ack advances at least to tp->snd_last, return 0.
+ */
+int
+tcp_sack_partialack(tp, ti)
+ struct tcpcb *tp;
+ struct tcpiphdr *ti;
+{
+ if (SEQ_LT(ti->ti_ack, tp->snd_last)) {
+ /* Turn off retx. timer (will start again next segment) */
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+#ifndef TCP_FACK
+ /*
+ * Partial window deflation. This statement relies on the
+ * fact that tp->snd_una has not been updated yet. In FACK
+ * hold snd_cwnd constant during fast recovery.
+ */
+ tp->snd_cwnd -= (ti->ti_ack - tp->snd_una - tp->t_maxseg);
+#endif
+ return 1;
+ }
+ return 0;
+}
+#endif TCP_SACK
+
/*
* Pull out of band byte out of a segment so
* it doesn't appear in the user's data queue.
@@ -1784,7 +2431,6 @@ tcp_mss(tp, offer)
u_long bufsize;
struct inpcb *inp;
struct socket *so;
- extern int tcp_mssdflt;
inp = tp->t_inpcb;
ro = &inp->inp_route;
@@ -1919,3 +2565,40 @@ tcp_mss(tp, offer)
return (mss);
}
#endif /* TUBA_INCLUDE */
+
+#if defined(TCP_NEWRENO) || defined (TCP_SACK)
+/*
+ * Checks for partial ack. If partial ack arrives, force the retransmission
+ * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
+ * 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
+ * be started again. If the ack advances at least to tp->snd_last, return 0.
+ */
+int
+tcp_newreno(tp, ti)
+struct tcpcb *tp;
+struct tcpiphdr *ti;
+{
+ if (SEQ_LT(ti->ti_ack, tp->snd_last)) {
+ tcp_seq onxt = tp->snd_nxt;
+ tcp_seq ouna = tp->snd_una; /* snd_una not yet updated */
+ u_long ocwnd = tp->snd_cwnd;
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tp->snd_nxt = ti->ti_ack;
+ tp->snd_cwnd = tp->t_maxseg;
+ tp->snd_una = ti->ti_ack;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = ocwnd;
+ tp->snd_una = ouna;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ /*
+ * Partial window deflation. Relies on fact that tp->snd_una
+ * not updated yet.
+ */
+ tp->snd_cwnd -= (ti->ti_ack - tp->snd_una - tp->t_maxseg);
+ return 1;
+ }
+ return 0;
+}
+#endif /* TCP_NEWRENO || TCP_SACK */