21 files changed, 314 insertions, 148 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 485e35c3b28b..3a7052207708 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -61,6 +61,7 @@
 #include <net/tcp_states.h>
 #include <net/route.h>
 #include <linux/atalk.h>
+#include "../core/kmap_skb.h"
 
 struct datalink_proto *ddp_dl, *aarp_dl;
 static const struct proto_ops atalk_dgram_ops;
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 000000000000..283c2b993fb8
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+	BUG_ON(in_irq());
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a90bc439488e..8e1c385e5ba9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -56,7 +56,6 @@
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
-#include <linux/highmem.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -67,6 +66,8 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#include "kmap_skb.h"
+
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index ab8fafadb4ba..419c7d3289c7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/init.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index dac89166eb18..80f469887691 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -89,4 +89,37 @@ config IP_DCCP_CCID3_DEBUG
 	    parameter to 0 or 1.
 
 	    If in doubt, say N.
+
+config IP_DCCP_CCID3_RTO
+	  int "Use higher bound for nofeedback timer"
+	  default 100
+	  depends on IP_DCCP_CCID3 && EXPERIMENTAL
+	  ---help---
+	    Use higher lower bound for nofeedback timer expiration.
+
+	    The TFRC nofeedback timer normally expires after the maximum of 4
+	    RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
+	    with a small RTT this can mean a high processing load and reduced
+	    performance, since then the nofeedback timer is triggered very
+	    frequently.
+
+	    This option enables to set a higher lower bound for the nofeedback
+	    value. Values in units of milliseconds can be set here.
+
+	    A value of 0 disables this feature by enforcing the value specified
+	    in RFC 3448. The following values have been suggested as bounds for
+	    experimental use:
+	    	* 16-20ms to match the typical multimedia inter-frame interval
+	    	* 100ms as a reasonable compromise [default]
+	    	* 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
+
+	    The default of 100ms is a compromise between a large value for
+	    efficient DCCP implementations, and a small value to avoid disrupting
+	    the network in times of congestion.
+
+	    The purpose of the nofeedback timer is to slow DCCP down when there
+	    is serious network congestion: experimenting with larger values should
+	    therefore not be performed on WANs.
+
+
 endmenu
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 70ebe705eb75..cf8c07b2704f 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -121,12 +121,15 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
 /*
  * Update X by
  *    If (p > 0)
- *       x_calc = calcX(s, R, p);
+ *       X_calc = calcX(s, R, p);
  *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
  *    Else
  *       If (now - tld >= R)
  *          X = max(min(2 * X, 2 * X_recv), s / R);
  *          tld = now;
+ *
+ * If X has changed, we also update the scheduled send time t_now,
+ * the inter-packet interval t_ipi, and the delta value.
  */ 
 static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
@@ -134,8 +137,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	const __u32 old_x = hctx->ccid3hctx_x;
 
-	/* To avoid large error in calcX */
-	if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
+	if (hctx->ccid3hctx_p > 0) {
 		hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
 						     hctx->ccid3hctx_rtt,
 						     hctx->ccid3hctx_p);
@@ -223,16 +225,14 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 				       ccid3_tx_state_name(hctx->ccid3hctx_state));
 			/* Halve sending rate */
 
-			/*  If (X_calc > 2 * X_recv)
+			/*  If (p == 0 || X_calc > 2 * X_recv)
 			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
 			 *  Else
 			 *    X_recv = X_calc / 4;
 			 */
-			BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
-			       hctx->ccid3hctx_x_calc == 0);
+			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
-			/* check also if p is zero -> x_calc is infinity? */
-			if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
+			if (hctx->ccid3hctx_p  == 0 ||
 			    hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
 				hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
 								    hctx->ccid3hctx_s / (2 * TFRC_T_MBI));
@@ -245,9 +245,10 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		}
 		/*
 		 * Schedule no feedback timer to expire in
-		 * max(4 * R, 2 * s/X)  =  max(4 * R, 2 * t_ipi)
+		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
+		 * See comments in packet_recv() regarding the value of t_RTO.
 		 */
-		t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi);
+		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 		break;
 	case TFRC_SSTATE_NO_SENT:
 		DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk);
@@ -338,7 +339,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay >= hctx->ccid3hctx_delta)
+		if (delay - (long)hctx->ccid3hctx_delta >= 0)
 			return delay / 1000L;
 		break;
 	case TFRC_SSTATE_TERM:
@@ -412,10 +413,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_tx_hist_entry *packet;
 	struct timeval now;
 	unsigned long t_nfb;
-	u32 t_elapsed;
 	u32 pinv;
-	u32 x_recv;
-	u32 r_sample;
+	long r_sample, t_elapsed;
 
 	BUG_ON(hctx == NULL);
 
@@ -426,31 +425,44 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	opt_recv = &hctx->ccid3hctx_options_received;
 
-	t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
-	x_recv = opt_recv->ccid3or_receive_rate;
-	pinv = opt_recv->ccid3or_loss_event_rate;
-
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
-		/* Calculate new round trip sample by
-		 * R_sample = (now - t_recvdata) - t_delay */
-		/* get t_recvdata from history */
+		/* get packet from history to look up t_recvdata */
 		packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
 						 DCCP_SKB_CB(skb)->dccpd_ack_seq);
 		if (unlikely(packet == NULL)) {
-			DCCP_WARN("%s, sk=%p, seqno %llu(%s) does't exist "
+			DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist "
 				  "in history!\n",  dccp_role(sk), sk,
 			    (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
 				  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
 			return;
 		}
 
-		/* Update RTT */
+		/* Update receive rate */
+		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+
+		/* Update loss event rate */
+		pinv = opt_recv->ccid3or_loss_event_rate;
+		if (pinv == ~0U || pinv == 0)
+			hctx->ccid3hctx_p = 0;
+		else
+ 			hctx->ccid3hctx_p = 1000000 / pinv;
+
 		dccp_timestamp(sk, &now);
-		r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
-		if (unlikely(r_sample <= t_elapsed))
-			DCCP_WARN("r_sample=%uus,t_elapsed=%uus\n",
+
+		/*
+		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
+		 * 	R_sample  =  (now - t_recvdata) - t_elapsed
+		 */
+		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
+		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
+
+		if (unlikely(r_sample <= 0)) {
+			DCCP_WARN("WARNING: R_sample (%ld) <= 0!\n", r_sample);
+			r_sample = 0;
+		} else if (unlikely(r_sample <= t_elapsed))
+			DCCP_WARN("WARNING: r_sample=%ldus <= t_elapsed=%ldus\n",
 				  r_sample, t_elapsed);
 		else
 			r_sample -= t_elapsed;
@@ -473,31 +485,25 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_time(hctx);
-			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
-		} else {
-			hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
-					      r_sample / 10;
-			ccid3_hc_tx_update_x(sk, &now);
-		}
 
-		ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
-			       "r_sample=%us\n", dccp_role(sk), sk,
-			       hctx->ccid3hctx_rtt, r_sample);
+			ccid3_pr_debug("%s(%p), s=%u, w_init=%u, "
+				       "R_sample=%ldus, X=%u\n", dccp_role(sk),
+				       sk, hctx->ccid3hctx_s, w_init, r_sample,
+				       hctx->ccid3hctx_x);
 
-		/* Update receive rate */
-		hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+		} else {
+			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
+					           (u32)r_sample        ) / 10;
 
-		/* Update loss event rate */
-		if (pinv == ~0 || pinv == 0)
-			hctx->ccid3hctx_p = 0;
-		else {
-			hctx->ccid3hctx_p = 1000000 / pinv;
+			ccid3_hc_tx_update_x(sk, &now);
 
-			if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
-				hctx->ccid3hctx_p = TFRC_SMALLEST_P;
-				ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
-					       dccp_role(sk), sk);
-			}
+			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%ldus), s=%u, "
+				       "p=%u, X_calc=%u, X=%u\n", dccp_role(sk),
+				       sk, hctx->ccid3hctx_rtt, r_sample,
+				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+				       hctx->ccid3hctx_x_calc,
+				       hctx->ccid3hctx_x);
 		}
 
 		/* unschedule no feedback timer */
@@ -512,16 +518,20 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		 */
 		sk->sk_write_space(sk);
 
-		/* Update timeout interval. We use the alternative variant of
-		 * [RFC 3448, 3.1] which sets the upper bound of t_rto to one
-		 * second, as it is suggested for TCP (see RFC 2988, 2.4). */
+		/*
+		 * Update timeout interval for the nofeedback timer.
+		 * We use a configuration option to increase the lower bound.
+		 * This can help avoid triggering the nofeedback timer too often
+		 * ('spinning') on LANs with small RTTs.
+		 */
 		hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
-					      	   USEC_PER_SEC            );
+						   CONFIG_IP_DCCP_CCID3_RTO *
+						   (USEC_PER_SEC/1000)	     );
 		/*
 		 * Schedule no feedback timer to expire in
-		 * max(4 * R, 2 * s/X)  =  max(4 * R, 2 * t_ipi)
+		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
 		 */
-		t_nfb = max(4 * hctx->ccid3hctx_rtt, 2 * hctx->ccid3hctx_t_ipi);
+		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 			
 		ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
 			       "expire in %lu jiffies (%luus)\n",
@@ -535,7 +545,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hctx->ccid3hctx_idle = 1;   
 		break;
 	case TFRC_SSTATE_NO_SENT:
-		DCCP_WARN("Illegal ACK received - no packet has been sent\n");
+		if (dccp_sk(sk)->dccps_role == DCCP_ROLE_CLIENT)
+			DCCP_WARN("Illegal ACK received - no packet sent\n");
 		/* fall through */
 	case TFRC_SSTATE_TERM:		/* ignore feedback when closing */
 		break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 27cb20ae1da8..07596d704ef9 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,8 +51,6 @@
 /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
 #define TFRC_T_MBI		   64
 
-#define TFRC_SMALLEST_P		   40
-
 enum ccid3_options {
 	TFRC_OPT_LOSS_EVENT_RATE = 192,
 	TFRC_OPT_LOSS_INTERVALS	 = 193,
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2601012383fb..ddac2c511e2f 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -18,10 +18,79 @@
 #include "tfrc.h"
 
 #define TFRC_CALC_X_ARRSIZE 500
+#define TFRC_CALC_X_SPLIT   50000	/* 0.05 * 1000000, details below */
+#define TFRC_SMALLEST_P	    (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
 
-#define TFRC_CALC_X_SPLIT 50000
-/* equivalent to 0.05 */
-
+/*
+  TFRC TCP Reno Throughput Equation Lookup Table for f(p)
+
+  The following two-column lookup table implements a part of the TCP throughput
+  equation from [RFC 3448, sec. 3.1]:
+
+	 			     s
+  X_calc  =  --------------------------------------------------------------
+	     R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
+
+  Where:
+	X      is the transmit rate in bytes/second
+	s      is the packet size in bytes
+	R      is the round trip time in seconds
+	p      is the loss event rate, between 0 and 1.0, of the number of loss
+	              events as a fraction of the number of packets transmitted
+	t_RTO  is the TCP retransmission timeout value in seconds
+	b      is the number of packets acknowledged by a single TCP ACK
+
+  We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
+
+				     s
+  X_calc  =  -------------------------------------------------------
+	     R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
+
+  which we can break down into:
+
+                      s
+	X_calc  =  ---------
+	            R * f(p)
+
+  where f(p) is given for 0 < p <= 1 by:
+
+	f(p)  =  sqrt(2*p/3) + 12 * sqrt(3*p/8) *  (p + 32*p^3)
+
+  Since this is kernel code, floating-point arithmetic is avoided in favour of
+  integer arithmetic. This means that nearly all fractional parameters are
+  scaled by 1000000:
+    * the parameters p and R
+    * the return result f(p)
+  The lookup table therefore actually tabulates the following function g(q):
+
+  	g(q)  =  1000000 * f(q/1000000)
+
+  Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
+  granularity for the practically more relevant case of small values of p (up to
+  5%), the second column is used; the first one ranges up to 100%.  This split
+  corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
+  determines the smallest resolution possible with this lookup table:
+
+    TFRC_SMALLEST_P   =  TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
+
+  The entire table is generated by:
+    for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
+	lookup[i][0]  =  g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
+	lookup[i][1]  =  g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
+    }
+
+  With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
+    lookup[0][0]  =  g(1000000/(M+1)) 		 =  1000000 * f(0.2%)
+    lookup[M][0]  =  g(1000000)			 =  1000000 * f(100%)
+    lookup[0][1]  =  g(TFRC_SMALLEST_P)		  = 1000000 * f(0.01%)
+    lookup[M][1]  =  g(TFRC_CALC_X_SPLIT)	 =  1000000 * f(5%)
+
+  In summary, the two columns represent f(p) for the following ranges:
+    * The first column is for   0.002  <= p <= 1.0
+    * The second column is for  0.0001 <= p <= 0.05
+  Where the columns overlap, the second (finer-grained) is given preference,
+  i.e. the first column is used only for p >= 0.05.
+ */
 static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
 	{     37172,   8172 },
 	{     53499,  11567 },
@@ -525,85 +594,69 @@ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
 	{ 243315981, 271305 }
 };
 
-/* Calculate the send rate as per section 3.1 of RFC3448
- 
-Returns send rate in bytes per second
-
-Integer maths and lookups are used as not allowed floating point in kernel
-
-The function for Xcalc as per section 3.1 of RFC3448 is:
-
-X =                            s
-     -------------------------------------------------------------
-     R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
-
-where 
-X is the trasmit rate in bytes/second
-s is the packet size in bytes
-R is the round trip time in seconds
-p is the loss event rate, between 0 and 1.0, of the number of loss events 
-  as a fraction of the number of packets transmitted
-t_RTO is the TCP retransmission timeout value in seconds
-b is the number of packets acknowledged by a single TCP acknowledgement
-
-we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
-
-X =                            s
-     -----------------------------------------------------------------------
-     R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
-
-
-which we can break down into:
-
-X =     s
-     --------
-     R * f(p)
-
-where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
-
-Function parameters:
-s - bytes
-R - RTT in usecs
-p - loss rate (decimal fraction multiplied by 1,000,000)
-
-Returns Xcalc in bytes per second
-
-DON'T alter this code unless you run test cases against it as the code
-has been manipulated to stop underflow/overlow.
+/* return largest index i such that fval <= lookup[i][small] */
+static inline u32 tfrc_binsearch(u32 fval, u8 small)
+{
+	u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
+
+	while (low < high) {
+		try = (low + high) / 2;
+		if (fval <= tfrc_calc_x_lookup[try][small])
+			high = try;
+		else
+			low  = try + 1;
+	}
+	return high;
+}
 
-*/
+/**
+ * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
+ *
+ *  @s: packet size          in bytes
+ *  @R: RTT                  scaled by 1000000   (i.e., microseconds)
+ *  @p: loss ratio estimate  scaled by 1000000
+ *  Returns X_calc           in bytes per second (not scaled).
+ *
+ * Note: DO NOT alter this code unless you run test cases against it,
+ *       as the code has been optimized to stop underflow/overflow.
+ */
 u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 {
 	int index;
 	u32 f;
 	u64 tmp1, tmp2;
 
-	if (p < TFRC_CALC_X_SPLIT)
-		index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
-	else
-		index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+	/* check against invalid parameters and divide-by-zero   */
+	BUG_ON(p >  1000000);		/* p must not exceed 100%   */
+	BUG_ON(p == 0);			/* f(0) = 0, divide by zero */
+	if (R == 0) {			/* possible  divide by zero */
+		DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
+		return ~0U;
+ 	}
 
-	if (index < 0)
-		/* p should be 0 unless there is a bug in my code */
-		index = 0;
+	if (p <= TFRC_CALC_X_SPLIT) 		{     /* 0.0000 < p <= 0.05   */
+		if (p < TFRC_SMALLEST_P) {	      /* 0.0000 < p <  0.0001 */
+			DCCP_WARN("Value of p (%d) below resolution. "
+				  "Substituting %d\n", p, TFRC_SMALLEST_P);
+			index = 0;
+		} else 				      /* 0.0001 <= p <= 0.05  */
+			index =  p/TFRC_SMALLEST_P - 1;
 
-	if (R == 0) {
-		DCCP_WARN("RTT==0, setting to 1\n");
-		R = 1; /* RTT can't be zero or else divide by zero */
-	}
+ 		f = tfrc_calc_x_lookup[index][1];
 
-	BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+	} else {	 			      /* 0.05   <  p <= 1.00  */
+		index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
 
-	if (p >= TFRC_CALC_X_SPLIT)
 		f = tfrc_calc_x_lookup[index][0];
-	else
-		f = tfrc_calc_x_lookup[index][1];
+	}
 
+	/* The following computes X = s/(R*f(p)) in bytes per second. Since f(p)
+	 * and R are both scaled by 1000000, we need to multiply by 1000000^2.
+	 * ==> DO NOT alter this unless you test against overflow on 32 bit   */
 	tmp1 = ((u64)s * 100000000);
 	tmp2 = ((u64)R * (u64)f);
 	do_div(tmp2, 10000);
 	do_div(tmp1, tmp2); 
-	/* Don't alter above math unless you test due to overflow on 32 bit */
 
 	return (u32)tmp1; 
 }
@@ -611,33 +664,36 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 EXPORT_SYMBOL_GPL(tfrc_calc_x);
 
 /*
- * args: fvalue - function value to match
- * returns: p closest to that value
+ *  tfrc_calc_x_reverse_lookup  -  try to find p given f(p)
  *
- * both fvalue and p are multiplied by 1,000,000 to use ints
+ *  @fvalue: function value to match, scaled by 1000000
+ *  Returns closest match for p, also scaled by 1000000
  */
 u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 {
-	int ctr = 0;
-	int small;
+	int index;
 
-	if (fvalue < tfrc_calc_x_lookup[0][1])
+	if (fvalue == 0)	/* f(p) = 0  whenever  p = 0 */
 		return 0;
 
-	if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
-		small = 1;
-	else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+	/* Error cases. */
+	if (fvalue < tfrc_calc_x_lookup[0][1]) {
+		DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
+		return tfrc_calc_x_lookup[0][1];
+	}
+	if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
+		DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
 		return 1000000;
-	else
-		small = 0;
-
-	while (fvalue > tfrc_calc_x_lookup[ctr][small])
-		ctr++;
+	}
 
-	if (small)
-		return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
-	else
-		return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+	if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
+		index = tfrc_binsearch(fvalue, 1);
+		return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
+ 	}
+ 
+	/* else ... it must be in the coarse-grained column */
+	index = tfrc_binsearch(fvalue, 0);
+	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
 
 EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 4200ec509866..fc1f99a59732 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -16,6 +16,7 @@
 #include <linux/random.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/mm.h>
 #include <linux/if_ether.h>
 #include <linux/if_arp.h>
 #include <asm/string.h>
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 1b2efff11d39..7a95c3d81314 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/skbuff.h>
+#include <linux/mm.h>
 #include <asm/string.h>
 
 #include <net/ieee80211.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1da3d32f8289..a35209d517ad 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -53,6 +53,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/highmem.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 524751e031de..a4385a2180ee 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -45,6 +45,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/jiffies.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 08990192b6ec..fe1af5d079af 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -43,6 +43,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/jiffies.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index 3fefc822c1c0..89fd2a2cbca6 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -32,6 +32,7 @@
 
 #include <linux/string.h>
 #include <linux/socket.h>
+#include <linux/fs.h>
 #include <linux/seq_file.h>
 
 #include <net/irda/irda.h>
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 8cfd076c4c12..8f1c6d65b247 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
+#include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 9c446a72ff1f..252f11012566 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -26,6 +26,7 @@
 
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/fs.h>
 #include <linux/seq_file.h>
 
 #include <asm/byteorder.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 93d97d9f9da8..eaa0f8a1adb6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -46,6 +46,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
+#include <linux/mm.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 58522fc65d33..8996584b8499 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 501c564e247f..a98de0b54d65 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -17,6 +17,7 @@
 #include <linux/seq_file.h>
 #include <linux/list.h>
 #include <linux/skbuff.h>
+#include <linux/mm.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 271d2eed0699..08e68b67bbf6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -71,6 +71,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/page.h>
+#include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6f97665983d2..311205ffa775 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -858,7 +858,6 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 	int i;
 
 	xp->xfrm_nr = nr;
-	xp->family = ut->family;
 	for (i = 0; i < nr; i++, ut++) {
 		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
 
@@ -876,19 +875,53 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 	}
 }
 
+static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
+{
+	int i;
+
+	if (nr > XFRM_MAX_DEPTH)
+		return -EINVAL;
+
+	for (i = 0; i < nr; i++) {
+		/* We never validated the ut->family value, so many
+		 * applications simply leave it at zero.  The check was
+		 * never made and ut->family was ignored because all
+		 * templates could be assumed to have the same family as
+		 * the policy itself.  Now that we will have ipv4-in-ipv6
+		 * and ipv6-in-ipv4 tunnels, this is no longer true.
+		 */
+		if (!ut[i].family)
+			ut[i].family = family;
+
+		switch (ut[i].family) {
+		case AF_INET:
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			break;
+#endif
+		default:
+			return -EINVAL;
+		};
+	}
+
+	return 0;
+}
+
 static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
 {
 	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
-	struct xfrm_user_tmpl *utmpl;
-	int nr;
 
 	if (!rt) {
 		pol->xfrm_nr = 0;
 	} else {
-		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+		struct xfrm_user_tmpl *utmpl = RTA_DATA(rt);
+		int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+		int err;
 
-		if (nr > XFRM_MAX_DEPTH)
-			return -EINVAL;
+		err = validate_tmpl(nr, utmpl, pol->family);
+		if (err)
+			return err;
 
 		copy_templates(pol, RTA_DATA(rt), nr);
 	}
@@ -1530,7 +1563,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xf
 	}
 
 	/*   build an XP */
-	xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err);        if (!xp) {
+	xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err);
+	if (!xp) {
 		kfree(x);
 		return err;
 	}
@@ -1979,7 +2013,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 		return NULL;
 
 	nr = ((len - sizeof(*p)) / sizeof(*ut));
-	if (nr > XFRM_MAX_DEPTH)
+	if (validate_tmpl(nr, ut, p->sel.family))
 		return NULL;
 
 	if (p->dir > XFRM_POLICY_OUT)