aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c101
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c182
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c102
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c15
5 files changed, 263 insertions, 160 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b87ca32efa0b..5d2b806a862e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
}
@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
}
}
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+ int conn_reuse_mode)
+{
+ /* Controlled (FTP DATA or persistence)? */
+ if (cp->control)
+ return false;
+
+ switch (cp->protocol) {
+ case IPPROTO_TCP:
+ return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ ((conn_reuse_mode & 2) &&
+ (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+ (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+ case IPPROTO_SCTP:
+ return cp->state == IP_VS_SCTP_S_CLOSED;
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1246,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1258,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1273,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1285,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1585,6 +1607,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
+ int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1653,10 +1676,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
- if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
- is_new_conn(skb, &iph)) {
- ip_vs_conn_expire_now(cp);
+ conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ if (conn_reuse_mode && !iph.fragoffs &&
+ is_new_conn(skb, &iph) && cp &&
+ ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) ||
+ unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}
@@ -1738,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1751,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1765,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1778,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1798,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
@@ -1820,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ed99448671c3..49532672f66d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
}
static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
{
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
spin_lock_bh(&src->lock);
@@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
}
static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+ dst->conns = (u32)src->conns;
+ dst->inpkts = (u32)src->inpkts;
+ dst->outpkts = (u32)src->outpkts;
+ dst->inbytes = src->inbytes;
+ dst->outbytes = src->outbytes;
+ dst->cps = (u32)src->cps;
+ dst->inpps = (u32)src->inpps;
+ dst->outpps = (u32)src->outpps;
+ dst->inbps = (u32)src->inbps;
+ dst->outbps = (u32)src->outbps;
+}
+
+static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
/* get current counters as zero point, rates are zeroed */
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_reuse_mode",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats_user show;
+ struct ip_vs_kstats show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
- show.inpkts, show.outpkts,
- (unsigned long long) show.inbytes,
- (unsigned long long) show.outbytes);
-
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)show.conns,
+ (unsigned long long)show.inpkts,
+ (unsigned long long)show.outpkts,
+ (unsigned long long)show.inbytes,
+ (unsigned long long)show.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, "%8X %8X %8X %16X %16X\n",
- show.cps, show.inpps, show.outpps,
- show.inbps, show.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+ (unsigned long long)show.cps,
+ (unsigned long long)show.inpps,
+ (unsigned long long)show.outpps,
+ (unsigned long long)show.inbps,
+ (unsigned long long)show.outbps);
return 0;
}
@@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
- struct ip_vs_stats_user rates;
+ struct ip_vs_kstats kstats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
do {
start = u64_stats_fetch_begin_irq(&u->syncp);
- inbytes = u->ustats.inbytes;
- outbytes = u->ustats.outbytes;
+ conns = u->cnt.conns;
+ inpkts = u->cnt.inpkts;
+ outpkts = u->cnt.outpkts;
+ inbytes = u->cnt.inbytes;
+ outbytes = u->cnt.outbytes;
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
- seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
- i, u->ustats.conns, u->ustats.inpkts,
- u->ustats.outpkts, (__u64)inbytes,
- (__u64)outbytes);
+ seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+ i, (u64)conns, (u64)inpkts,
+ (u64)outpkts, (u64)inbytes,
+ (u64)outbytes);
}
- spin_lock_bh(&tot_stats->lock);
-
- seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
- tot_stats->ustats.conns, tot_stats->ustats.inpkts,
- tot_stats->ustats.outpkts,
- (unsigned long long) tot_stats->ustats.inbytes,
- (unsigned long long) tot_stats->ustats.outbytes);
-
- ip_vs_read_estimator(&rates, tot_stats);
+ ip_vs_copy_stats(&kstats, tot_stats);
- spin_unlock_bh(&tot_stats->lock);
+ seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)kstats.conns,
+ (unsigned long long)kstats.inpkts,
+ (unsigned long long)kstats.outpkts,
+ (unsigned long long)kstats.inbytes,
+ (unsigned long long)kstats.outbytes);
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, " %8X %8X %8X %16X %16X\n",
- rates.cps,
- rates.inpps,
- rates.outpps,
- rates.inbps,
- rates.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
+ kstats.cps,
+ kstats.inpps,
+ kstats.outpps,
+ kstats.inbps,
+ kstats.outbps);
return 0;
}
@@ -2400,6 +2426,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
+ struct ip_vs_kstats kstats;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
@@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
dst->num_dests = src->num_dests;
- ip_vs_copy_stats(&dst->stats, &src->stats);
+ ip_vs_copy_stats(&kstats, &src->stats);
+ ip_vs_export_stats_user(&dst->stats, &kstats);
}
static inline int
@@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
+ struct ip_vs_kstats kstats;
memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
entry.persistconns = atomic_read(&dest->persistconns);
- ip_vs_copy_stats(&entry.stats, &dest->stats);
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ ip_vs_export_stats_user(&entry.stats, &kstats);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
ret = -EFAULT;
@@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
- struct ip_vs_stats *stats)
+ struct ip_vs_kstats *kstats)
{
- struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
if (!nl_stats)
return -EMSGSIZE;
- ip_vs_copy_stats(&ustats, stats);
-
- if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+ if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+ goto nla_put_failure;
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+ struct ip_vs_kstats *kstats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
@@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
+ struct ip_vs_kstats kstats;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ ip_vs_copy_stats(&kstats, &svc->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
@@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
+ struct ip_vs_kstats kstats;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
@@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
atomic_read(&dest->persistconns)) ||
nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
@@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
+ ipvs->sysctl_conn_reuse_mode = 1;
+ tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 1425e9a924c4..ef0eb0a8d552 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -45,17 +45,19 @@
NOTES.
- * The stored value for average bps is scaled by 2^5, so that maximal
- rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+ * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
- * A lot code is taken from net/sched/estimator.c
+ * Netlink users can see 64-bit values but sockopt users are restricted
+ to 32-bit values for conns, packets, bps, cps and pps.
+
+ * A lot of code is taken from net/core/gen_estimator.c
*/
/*
* Make a summary from each cpu
*/
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
+
if (add) {
- sum->conns += s->ustats.conns;
- sum->inpkts += s->ustats.inpkts;
- sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- inbytes = s->ustats.inbytes;
- outbytes = s->ustats.outbytes;
+ conns = s->cnt.conns;
+ inpkts = s->cnt.inpkts;
+ outpkts = s->cnt.outpkts;
+ inbytes = s->cnt.inbytes;
+ outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
+ sum->conns += conns;
+ sum->inpkts += inpkts;
+ sum->outpkts += outpkts;
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
add = true;
- sum->conns = s->ustats.conns;
- sum->inpkts = s->ustats.inpkts;
- sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- sum->inbytes = s->ustats.inbytes;
- sum->outbytes = s->ustats.outbytes;
+ sum->conns = s->cnt.conns;
+ sum->inpkts = s->cnt.inpkts;
+ sum->outpkts = s->cnt.outpkts;
+ sum->inbytes = s->cnt.inbytes;
+ sum->outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
- u32 n_conns;
- u32 n_inpkts, n_outpkts;
- u64 n_inbytes, n_outbytes;
- u32 rate;
+ u64 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
- ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
- n_conns = s->ustats.conns;
- n_inpkts = s->ustats.inpkts;
- n_outpkts = s->ustats.outpkts;
- n_inbytes = s->ustats.inbytes;
- n_outbytes = s->ustats.outbytes;
+ ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
/* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns) << 9;
- e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps) >> 2;
-
- rate = (n_inpkts - e->last_inpkts) << 9;
- e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
- rate = (n_outpkts - e->last_outpkts) << 9;
- e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
- rate = (n_inbytes - e->last_inbytes) << 4;
- e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
- rate = (n_outbytes - e->last_outbytes) << 4;
- e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps) >> 2;
+ rate = (s->kstats.conns - e->last_conns) << 9;
+ e->last_conns = s->kstats.conns;
+ e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+ rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+ e->last_inpkts = s->kstats.inpkts;
+ e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+ rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+ e->last_outpkts = s->kstats.outpkts;
+ e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+ /* scaled by 2^5, but divided 2 seconds */
+ rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+ e->last_inbytes = s->kstats.inbytes;
+ e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+ rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+ e->last_outbytes = s->kstats.outbytes;
+ e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
- struct ip_vs_stats_user *u = &stats->ustats;
+ struct ip_vs_kstats *k = &stats->kstats;
/* reset counters, caller must hold the stats->lock lock */
- est->last_inbytes = u->inbytes;
- est->last_outbytes = u->outbytes;
- est->last_conns = u->conns;
- est->last_inpkts = u->inpkts;
- est->last_outpkts = u->outpkts;
+ est->last_inbytes = k->inbytes;
+ est->last_outbytes = k->outbytes;
+ est->last_conns = k->conns;
+ est->last_inpkts = k->inpkts;
+ est->last_outpkts = k->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
}
/* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
- struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d93ceeb3ef04..19b9cce6c210 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
- else
+ if (cp && ((cp->dport != dport) ||
+ !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+ if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ } else {
+ /* This is the expiration message for the
+ * connection that was already replaced, so we
+ * just ignore it.
+ */
+ __ip_vs_conn_put(cp);
+ kfree(param->pe_data);
+ return;
+ }
+ }
+ } else {
cp = ip_vs_ct_in_get(param);
+ }
if (cp) {
/* Free pe_data */
@@ -1388,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
+ rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
+ rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..19986ec5f21a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
struct sock *sk = skb->sk;
struct rtable *ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ if (!skb->dev && sk && sk_fullsock(sk))
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
@@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_update_conntrack(skb, cp, 1);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -924,7 +924,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = ttl;
- ip_select_ident(skb, NULL);
+ ip_select_ident(net, skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;