aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/ip_input.c10
-rw-r--r--net/ipv4/ipmr.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c2
-rw-r--r--net/ipv4/proc.c58
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_htcp.c14
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tcp_vegas.c82
-rw-r--r--net/ipv4/udp.c13
-rw-r--r--net/ipv4/xfrm4_state.c1
12 files changed, 90 insertions, 125 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbff5fa4241..1aa2dc9e380e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1117,6 +1117,7 @@ int inet_sk_rebuild_header(struct sock *sk)
},
},
.proto = sk->sk_protocol,
+ .flags = inet_sk_flowi_flags(sk),
.uli_u = {
.ports = {
.sport = inet->sport,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 490e035c6d90..2e78f6bd9775 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2063,9 +2063,10 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
u32 opt_len;
int len_delta;
- buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
- if (buf_len < 0)
- return buf_len;
+ ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+ if (ret_val < 0)
+ return ret_val;
+ buf_len = ret_val;
opt_len = (buf_len + 3) & ~3;
/* we overwrite any existing options to ensure that we have enough
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 861978a4f1a8..cfb38ac9d698 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -209,9 +209,17 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
hash = protocol & (MAX_INET_PROTOS - 1);
ipprot = rcu_dereference(inet_protos[hash]);
- if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
+ if (ipprot != NULL) {
int ret;
+ if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
+ if (net_ratelimit())
+ printk("%s: proto %d isn't netns-ready\n",
+ __func__, protocol);
+ kfree_skb(skb);
+ goto out;
+ }
+
if (!ipprot->no_policy) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
kfree_skb(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b42e082cc170..25924b1eb2ef 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1945,13 +1945,14 @@ int __init ip_mr_init(void)
goto proc_cache_fail;
#endif
return 0;
-reg_notif_fail:
- kmem_cache_destroy(mrt_cachep);
#ifdef CONFIG_PROC_FS
-proc_vif_fail:
- unregister_netdevice_notifier(&ip_mr_notifier);
proc_cache_fail:
proc_net_remove(&init_net, "ip_mr_vif");
+proc_vif_fail:
+ unregister_netdevice_notifier(&ip_mr_notifier);
#endif
+reg_notif_fail:
+ del_timer(&ipmr_expire_timer);
+ kmem_cache_destroy(mrt_cachep);
return err;
}
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index bea54a685109..8d489e746b21 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -61,7 +61,7 @@ static struct
static struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(__nat_table.lock),
+ .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8f5a403f6f6b..a631a1f110ca 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_SENTINEL
};
+static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
+ unsigned short *type, int count)
+{
+ int j;
+
+ if (count) {
+ seq_printf(seq, "\nIcmpMsg:");
+ for (j = 0; j < count; ++j)
+ seq_printf(seq, " %sType%u",
+ type[j] & 0x100 ? "Out" : "In",
+ type[j] & 0xff);
+ seq_printf(seq, "\nIcmpMsg:");
+ for (j = 0; j < count; ++j)
+ seq_printf(seq, " %lu", vals[j]);
+ }
+}
+
static void icmpmsg_put(struct seq_file *seq)
{
#define PERLINE 16
- int j, i, count;
- static int out[PERLINE];
+ int i, count;
+ unsigned short type[PERLINE];
+ unsigned long vals[PERLINE], val;
struct net *net = seq->private;
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
-
- if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
- out[count++] = i;
- if (count < PERLINE)
- continue;
-
- seq_printf(seq, "\nIcmpMsg:");
- for (j = 0; j < PERLINE; ++j)
- seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In",
- i & 0xff);
- seq_printf(seq, "\nIcmpMsg: ");
- for (j = 0; j < PERLINE; ++j)
- seq_printf(seq, " %lu",
- snmp_fold_field((void **) net->mib.icmpmsg_statistics,
- out[j]));
- seq_putc(seq, '\n');
- }
- if (count) {
- seq_printf(seq, "\nIcmpMsg:");
- for (j = 0; j < count; ++j)
- seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" :
- "In", out[j] & 0xff);
- seq_printf(seq, "\nIcmpMsg:");
- for (j = 0; j < count; ++j)
- seq_printf(seq, " %lu", snmp_fold_field((void **)
- net->mib.icmpmsg_statistics, out[j]));
+ val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i);
+ if (val) {
+ type[count] = i;
+ vals[count++] = val;
+ }
+ if (count == PERLINE) {
+ icmpmsg_put_line(seq, vals, type, count);
+ count = 0;
+ }
}
+ icmpmsg_put_line(seq, vals, type, count);
#undef PERLINE
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eccb7165a80c..c5aca0bb116a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sk->sk_state == TCP_CLOSE ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
!timeo ||
- signal_pending(current) ||
- (flags & MSG_PEEK))
+ signal_pending(current))
break;
} else {
if (sock_flag(sk, SOCK_DONE))
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index af99776146ff..937549b8a921 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -69,9 +69,12 @@ static u32 htcp_cwnd_undo(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- ca->last_cong = ca->undo_last_cong;
- ca->maxRTT = ca->undo_maxRTT;
- ca->old_maxB = ca->undo_old_maxB;
+ if (ca->undo_last_cong) {
+ ca->last_cong = ca->undo_last_cong;
+ ca->maxRTT = ca->undo_maxRTT;
+ ca->old_maxB = ca->undo_old_maxB;
+ ca->undo_last_cong = 0;
+ }
return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
}
@@ -268,7 +271,10 @@ static void htcp_state(struct sock *sk, u8 new_state)
case TCP_CA_Open:
{
struct htcp *ca = inet_csk_ca(sk);
- ca->last_cong = jiffies;
+ if (ca->undo_last_cong) {
+ ca->last_cong = jiffies;
+ ca->undo_last_cong = 0;
+ }
}
break;
case TCP_CA_CWR:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ba85d8831893..fe3b4bdfd251 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1028,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
/* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account.
- *
- * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up
- * cannot be large. However, taking into account rare use of URG, this
- * is not a big flaw.
*/
unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
{
@@ -1046,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now = tp->mss_cache;
- if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp))
+ if (large_allowed && sk_can_gso(sk))
doing_tso = 1;
if (dst) {
@@ -1516,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk)
* send_head. This happens as incoming acks open up the remote
* window for us.
*
+ * LARGESEND note: !tcp_urg_mode is overkill, only frames between
+ * snd_up-64k-mss .. snd_up cannot be large. However, taking into
+ * account rare use of URG, this is not a big flaw.
+ *
* Returns 1, if no segments are in flight and we have queued segments, but
* cannot send anything now because of SWS or another problem.
*/
@@ -1567,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
}
limit = mss_now;
- if (tso_segs > 1)
+ if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
cwnd_quota);
@@ -1616,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
*/
void tcp_push_one(struct sock *sk, unsigned int mss_now)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_send_head(sk);
unsigned int tso_segs, cwnd_quota;
@@ -1630,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!tso_segs);
limit = mss_now;
- if (tso_segs > 1)
+ if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
cwnd_quota);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 14504dada116..a453aac91bd3 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -40,18 +40,14 @@
#include "tcp_vegas.h"
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-static int alpha = 2<<V_PARAM_SHIFT;
-static int beta = 4<<V_PARAM_SHIFT;
-static int gamma = 1<<V_PARAM_SHIFT;
+static int alpha = 2;
+static int beta = 4;
+static int gamma = 1;
module_param(alpha, int, 0644);
-MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(alpha, "lower bound of packets in network");
module_param(beta, int, 0644);
-MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(beta, "upper bound of packets in network");
module_param(gamma, int, 0644);
MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
@@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
return;
}
- /* The key players are v_beg_snd_una and v_beg_snd_nxt.
- *
- * These are so named because they represent the approximate values
- * of snd_una and snd_nxt at the beginning of the current RTT. More
- * precisely, they represent the amount of data sent during the RTT.
- * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
- * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
- * bytes of data have been ACKed during the course of the RTT, giving
- * an "actual" rate of:
- *
- * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
- *
- * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
- * because delayed ACKs can cover more than one segment, so they
- * don't line up nicely with the boundaries of RTTs.
- *
- * Another unfortunate fact of life is that delayed ACKs delay the
- * advance of the left edge of our send window, so that the number
- * of bytes we send in an RTT is often less than our cwnd will allow.
- * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
- */
-
if (after(ack, vegas->beg_snd_nxt)) {
/* Do the Vegas once-per-RTT cwnd adjustment. */
- u32 old_wnd, old_snd_cwnd;
-
-
- /* Here old_wnd is essentially the window of data that was
- * sent during the previous RTT, and has all
- * been acknowledged in the course of the RTT that ended
- * with the ACK we just received. Likewise, old_snd_cwnd
- * is the cwnd during the previous RTT.
- */
- old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
- tp->mss_cache;
- old_snd_cwnd = vegas->beg_snd_cwnd;
/* Save the extent of the current window so we can use this
* at the end of the next RTT.
*/
- vegas->beg_snd_una = vegas->beg_snd_nxt;
vegas->beg_snd_nxt = tp->snd_nxt;
- vegas->beg_snd_cwnd = tp->snd_cwnd;
/* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got
@@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*
* This is:
* (actual rate in segments) * baseRTT
- * We keep it as a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary point.
*/
- target_cwnd = ((u64)old_wnd * vegas->baseRTT);
- target_cwnd <<= V_PARAM_SHIFT;
- do_div(target_cwnd, rtt);
+ target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
* is the "Diff" from the Arizona Vegas papers.
- *
- * Again, this is a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary
- * point.
*/
- diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
+ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
if (diff > gamma && tp->snd_ssthresh > 2 ) {
/* Going too fast. Time to slow down
@@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* truncation robs us of full link
* utilization.
*/
- tp->snd_cwnd = min(tp->snd_cwnd,
- ((u32)target_cwnd >>
- V_PARAM_SHIFT)+1);
+ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
tcp_slow_start(tp);
} else {
/* Congestion avoidance. */
- u32 next_snd_cwnd;
/* Figure out where we would like cwnd
* to be.
@@ -300,32 +249,25 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
/* The old window was too fast, so
* we slow down.
*/
- next_snd_cwnd = old_snd_cwnd - 1;
+ tp->snd_cwnd--;
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
*/
- next_snd_cwnd = old_snd_cwnd + 1;
+ tp->snd_cwnd++;
} else {
/* Sending just as fast as we
* should be.
*/
- next_snd_cwnd = old_snd_cwnd;
}
-
- /* Adjust cwnd upward or downward, toward the
- * desired value.
- */
- if (next_snd_cwnd > tp->snd_cwnd)
- tp->snd_cwnd++;
- else if (next_snd_cwnd < tp->snd_cwnd)
- tp->snd_cwnd--;
}
if (tp->snd_cwnd < 2)
tp->snd_cwnd = 2;
else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
tp->snd_cwnd = tp->snd_cwnd_clamp;
+
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
}
/* Wipe the slate clean for the next RTT. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2095abc3caba..98c1fd09be88 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -284,7 +284,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
@@ -296,7 +296,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk,
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
- if (s->sk_hash != hnum ||
+ if (!net_eq(sock_net(s), net) ||
+ s->sk_hash != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -632,6 +633,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.saddr = saddr,
.tos = tos } },
.proto = sk->sk_protocol,
+ .flags = inet_sk_flowi_flags(sk),
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
@@ -1079,15 +1081,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
read_lock(&udp_hash_lock);
sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
+ sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
+ daddr, uh->source, saddr,
+ dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 07735ed280d7..55dc6beab9aa 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->sel.dport_mask = htons(0xffff);
x->sel.sport = xfrm_flowi_sport(fl);
x->sel.sport_mask = htons(0xffff);
+ x->sel.family = AF_INET;
x->sel.prefixlen_d = 32;
x->sel.prefixlen_s = 32;
x->sel.proto = fl->proto;