aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2012-07-04 13:13:55 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2012-07-04 13:13:55 -0700
commit404c3bc30cb1361e1b3533643326ab472d24a618 (patch)
tree156cc9032c8aee17167d926c5bdae009ba8f36d2 /net/netfilter/ipvs
parentInput: wacom - TPC2FG doesn't store touch id for slots (diff)
parentLinux 3.5-rc5 (diff)
downloadlinux-dev-404c3bc30cb1361e1b3533643326ab472d24a618.tar.xz
linux-dev-404c3bc30cb1361e1b3533643326ab472d24a618.zip
Merge commit 'v3.5-rc5' into next
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c104
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c270
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c56
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c668
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
15 files changed, 785 insertions, 417 deletions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 52856178c9d7..64f9e8f13207 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -313,7 +313,7 @@ vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
* Assumes already checked proto==IPPROTO_TCP and diff!=0.
*/
static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
- unsigned flag, __u32 seq, int diff)
+ unsigned int flag, __u32 seq, int diff)
{
/* spinlock is to keep updating cp->flags atomic */
spin_lock(&cp->lock);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 29fa5badde75..1548df9a7524 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -86,42 +86,42 @@ struct ip_vs_aligned_lock
static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-static inline void ct_read_lock(unsigned key)
+static inline void ct_read_lock(unsigned int key)
{
read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_read_unlock(unsigned key)
+static inline void ct_read_unlock(unsigned int key)
{
read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_write_lock(unsigned key)
+static inline void ct_write_lock(unsigned int key)
{
write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_write_unlock(unsigned key)
+static inline void ct_write_unlock(unsigned int key)
{
write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_read_lock_bh(unsigned key)
+static inline void ct_read_lock_bh(unsigned int key)
{
read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_read_unlock_bh(unsigned key)
+static inline void ct_read_unlock_bh(unsigned int key)
{
read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_write_lock_bh(unsigned key)
+static inline void ct_write_lock_bh(unsigned int key)
{
write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static inline void ct_write_unlock_bh(unsigned key)
+static inline void ct_write_unlock_bh(unsigned int key)
{
write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
@@ -130,7 +130,7 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto,
const union nf_inet_addr *addr,
__be16 port)
{
@@ -188,7 +188,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
*/
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
{
- unsigned hash;
+ unsigned int hash;
int ret;
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
@@ -224,7 +224,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
*/
static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
{
- unsigned hash;
+ unsigned int hash;
int ret;
/* unhash it and decrease its reference counter */
@@ -257,7 +257,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
static inline struct ip_vs_conn *
__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
{
- unsigned hash;
+ unsigned int hash;
struct ip_vs_conn *cp;
struct hlist_node *n;
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
{
- unsigned hash;
+ unsigned int hash;
struct ip_vs_conn *cp;
struct hlist_node *n;
@@ -394,7 +394,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
* p->vaddr, p->vport: pkt dest address (foreign host) */
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
{
- unsigned hash;
+ unsigned int hash;
struct ip_vs_conn *cp, *ret=NULL;
struct hlist_node *n;
@@ -548,6 +548,7 @@ static inline void
ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
{
unsigned int conn_flags;
+ __u32 flags;
/* if dest is NULL, then return directly */
if (!dest)
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
conn_flags = atomic_read(&dest->conn_flags);
if (cp->protocol != IPPROTO_UDP)
conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
+ flags = cp->flags;
/* Bind with the destination and its corresponding transmitter */
- if (cp->flags & IP_VS_CONN_F_SYNC) {
+ if (flags & IP_VS_CONN_F_SYNC) {
/* if the connection is not template and is created
* by sync, preserve the activity flag.
*/
- if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
conn_flags &= ~IP_VS_CONN_F_INACTIVE;
/* connections inherit forwarding method from dest */
- cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
+ flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
}
- cp->flags |= conn_flags;
+ flags |= conn_flags;
+ cp->flags = flags;
cp->dest = dest;
IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
atomic_read(&dest->refcnt));
/* Update the connection counters */
- if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
- /* It is a normal connection, so increase the inactive
- connection counter because it is in TCP SYNRECV
- state (inactive) or other protocol inacive state */
- if ((cp->flags & IP_VS_CONN_F_SYNC) &&
- (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ /* It is a normal connection, so modify the counters
+ * according to the flags, later the protocol can
+ * update them on state change
+ */
+ if (!(flags & IP_VS_CONN_F_INACTIVE))
atomic_inc(&dest->activeconns);
else
atomic_inc(&dest->inactconns);
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
{
struct ip_vs_dest *dest;
- if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
- cp->dport, &cp->vaddr, cp->vport,
- cp->protocol, cp->fwmark, cp->flags);
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+ cp->dport, &cp->vaddr, cp->vport,
+ cp->protocol, cp->fwmark, cp->flags);
+ if (dest) {
+ struct ip_vs_proto_data *pd;
+
+ spin_lock(&cp->lock);
+ if (cp->dest) {
+ spin_unlock(&cp->lock);
+ return dest;
+ }
+
+ /* Applications work depending on the forwarding method
+ * but better to reassign them always when binding dest */
+ if (cp->app)
+ ip_vs_unbind_app(cp);
+
ip_vs_bind_dest(cp, dest);
- return dest;
- } else
- return NULL;
+ spin_unlock(&cp->lock);
+
+ /* Update its packet transmitter */
+ cp->packet_xmit = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ ip_vs_bind_xmit_v6(cp);
+ else
+#endif
+ ip_vs_bind_xmit(cp);
+
+ pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+ if (pd && atomic_read(&pd->appcnt))
+ ip_vs_bind_app(cp, pd->pp);
+ }
+ return dest;
}
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct net *net = ip_vs_conn_net(cp);
+ struct netns_ipvs *ipvs = net_ipvs(net);
cp->timeout = 60*HZ;
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
atomic_read(&cp->refcnt)-1,
atomic_read(&cp->n_control));
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+
ip_vs_conn_put(cp);
}
@@ -824,7 +857,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
*/
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
- const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
+ const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
/* Set its state and timeout */
cp->state = 0;
cp->timeout = 3*HZ;
+ cp->sync_endtime = jiffies & ~3UL;
/* Bind its packet transmitter */
#ifdef CONFIG_IP_VS_IPV6
@@ -1057,7 +1091,7 @@ static const struct file_operations ip_vs_conn_fops = {
.release = seq_release_net,
};
-static const char *ip_vs_origin_name(unsigned flags)
+static const char *ip_vs_origin_name(unsigned int flags)
{
if (flags & IP_VS_CONN_F_SYNC)
return "SYNC";
@@ -1169,7 +1203,7 @@ void ip_vs_random_dropentry(struct net *net)
* Randomly scan 1/32 of the whole table every second
*/
for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
- unsigned hash = net_random() & ip_vs_conn_tab_mask;
+ unsigned int hash = net_random() & ip_vs_conn_tab_mask;
struct hlist_node *n;
/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2555816e7788..a54b018c6eea 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -80,7 +80,7 @@ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
#define icmp_id(icmph) (((icmph)->un).echo.id)
#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier)
-const char *ip_vs_proto_name(unsigned proto)
+const char *ip_vs_proto_name(unsigned int proto)
{
static char buf[20];
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else
pkts = atomic_add_return(1, &cp->in_pkts);
- if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
- cp->protocol == IPPROTO_SCTP) {
- if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % sysctl_sync_period(ipvs)
- == sysctl_sync_threshold(ipvs))) ||
- (cp->old_state != cp->state &&
- ((cp->state == IP_VS_SCTP_S_CLOSED) ||
- (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
- (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
- ip_vs_sync_conn(net, cp);
- goto out;
- }
- }
-
- /* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
- (((cp->protocol != IPPROTO_TCP ||
- cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_sync_period(ipvs)
- == sysctl_sync_threshold(ipvs))) ||
- ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
- ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
- (cp->state == IP_VS_TCP_S_CLOSE) ||
- (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
- (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(net, cp);
-out:
- cp->old_state = cp->state;
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ ip_vs_sync_conn(net, cp, pkts);
ip_vs_conn_put(cp);
return ret;
@@ -1924,6 +1898,7 @@ protocol_fail:
control_fail:
ip_vs_estimator_net_cleanup(net);
estimator_fail:
+ net->ipvs = NULL;
return -ENOMEM;
}
@@ -1936,6 +1911,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
ip_vs_control_net_cleanup(net);
ip_vs_estimator_net_cleanup(net);
IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+ net->ipvs = NULL;
}
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
@@ -1993,10 +1969,18 @@ static int __init ip_vs_init(void)
goto cleanup_dev;
}
+ ret = ip_vs_register_nl_ioctl();
+ if (ret < 0) {
+ pr_err("can't register netlink/ioctl.\n");
+ goto cleanup_hooks;
+ }
+
pr_info("ipvs loaded.\n");
return ret;
+cleanup_hooks:
+ nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
cleanup_dev:
unregister_pernet_device(&ipvs_core_dev_ops);
cleanup_sub:
@@ -2012,6 +1996,7 @@ exit:
static void __exit ip_vs_cleanup(void)
{
+ ip_vs_unregister_nl_ioctl();
nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
unregister_pernet_device(&ipvs_core_dev_ops);
unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b3afe189af61..dd811b8dd97c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -265,11 +265,11 @@ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
/*
* Returns hash value for virtual service
*/
-static inline unsigned
-ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+static inline unsigned int
+ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
const union nf_inet_addr *addr, __be16 port)
{
- register unsigned porth = ntohs(port);
+ register unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
@@ -286,7 +286,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
{
return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
@@ -298,7 +298,7 @@ static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
*/
static int ip_vs_svc_hash(struct ip_vs_service *svc)
{
- unsigned hash;
+ unsigned int hash;
if (svc->flags & IP_VS_SVC_F_HASHED) {
pr_err("%s(): request for already hashed, called from %pF\n",
@@ -361,7 +361,7 @@ static inline struct ip_vs_service *
__ip_vs_service_find(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
- unsigned hash;
+ unsigned int hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
@@ -388,7 +388,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
static inline struct ip_vs_service *
__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
{
- unsigned hash;
+ unsigned int hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
@@ -489,11 +489,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
/*
* Returns hash value for real service
*/
-static inline unsigned ip_vs_rs_hashkey(int af,
+static inline unsigned int ip_vs_rs_hashkey(int af,
const union nf_inet_addr *addr,
__be16 port)
{
- register unsigned porth = ntohs(port);
+ register unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
@@ -512,7 +512,7 @@ static inline unsigned ip_vs_rs_hashkey(int af,
*/
static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
- unsigned hash;
+ unsigned int hash;
if (!list_empty(&dest->d_list)) {
return 0;
@@ -555,7 +555,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
__be16 dport)
{
struct netns_ipvs *ipvs = net_ipvs(net);
- unsigned hash;
+ unsigned int hash;
struct ip_vs_dest *dest;
/*
@@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
- unsigned atype;
+ unsigned int atype;
EnterFunction(2);
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
}
#ifdef CONFIG_SYSCTL
+
+static int zero;
+static int three = 3;
+
static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(table, write, buffer, lenp, ppos);
- if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+ if (write && (valp[0] < 0 || valp[1] < 0 ||
+ (valp[0] >= valp[1] && valp[1]))) {
/* Restore the correct value */
memcpy(valp, val, sizeof(val));
}
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
if ((*valp < 0) || (*valp > 1)) {
/* Restore the correct value */
*valp = val;
- } else {
- struct net *net = current->nsproxy->net_ns;
- ip_vs_sync_switch_mode(net, val);
+ }
+ }
+ return rc;
+}
+
+static int
+proc_do_sync_ports(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ if (*valp < 1 || !is_power_of_2(*valp)) {
+ /* Restore the correct value */
+ *valp = val;
}
}
return rc;
@@ -1718,6 +1738,24 @@ static struct ctl_table vs_vars[] = {
.proc_handler = &proc_do_sync_mode,
},
{
+ .procname = "sync_ports",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_sync_ports,
+ },
+ {
+ .procname = "sync_qlen_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_sock_size",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "cache_bypass",
.maxlen = sizeof(int),
.mode = 0644,
@@ -1743,6 +1781,20 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_do_sync_threshold,
},
{
+ .procname = "sync_refresh_period",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "sync_retries",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &three,
+ },
+ {
.procname = "nat_icmp_send",
.maxlen = sizeof(int),
.mode = 0644,
@@ -1846,13 +1898,6 @@ static struct ctl_table vs_vars[] = {
{ }
};
-const struct ctl_path net_vs_ctl_path[] = {
- { .procname = "net", },
- { .procname = "ipv4", },
- { .procname = "vs", },
- { }
-};
-EXPORT_SYMBOL_GPL(net_vs_ctl_path);
#endif
#ifdef CONFIG_PROC_FS
@@ -1867,7 +1912,7 @@ struct ip_vs_iter {
* Write the contents of the VS rule table to a PROCfs file.
* (It is kept just for backward compatibility)
*/
-static inline const char *ip_vs_fwd_name(unsigned flags)
+static inline const char *ip_vs_fwd_name(unsigned int flags)
{
switch (flags & IP_VS_CONN_F_FWD_MASK) {
case IP_VS_CONN_F_LOCALNODE:
@@ -2816,17 +2861,17 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
ip_vs_copy_stats(&ustats, stats);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
-
+ if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+ goto nla_put_failure;
nla_nest_end(skb, nl_stats);
return 0;
@@ -2847,23 +2892,25 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
if (!nl_service)
return -EMSGSIZE;
- NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
-
+ if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
+ goto nla_put_failure;
if (svc->fwmark) {
- NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+ if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
+ goto nla_put_failure;
} else {
- NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
- NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
- NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+ if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
+ nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
+ nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
+ goto nla_put_failure;
}
- NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
- if (svc->pe)
- NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
- NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
- NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
- NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
-
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
+ (svc->pe &&
+ nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
+ nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
+ nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
+ nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
+ goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
goto nla_put_failure;
@@ -3038,21 +3085,22 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
if (!nl_dest)
return -EMSGSIZE;
- NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
- NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
-
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
- atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
- atomic_read(&dest->activeconns));
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
- atomic_read(&dest->inactconns));
- NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
- atomic_read(&dest->persistconns));
-
+ if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+ (atomic_read(&dest->conn_flags) &
+ IP_VS_CONN_F_FWD_MASK)) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
+ atomic_read(&dest->weight)) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+ atomic_read(&dest->activeconns)) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+ atomic_read(&dest->inactconns)) ||
+ nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+ atomic_read(&dest->persistconns)))
+ goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
@@ -3181,10 +3229,10 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
if (!nl_daemon)
return -EMSGSIZE;
- NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
- NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
- NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
-
+ if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
+ nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
+ nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
+ goto nla_put_failure;
nla_nest_end(skb, nl_daemon);
return 0;
@@ -3473,21 +3521,26 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
__ip_vs_get_timeouts(net, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
- NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
- NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
- t.tcp_fin_timeout);
+ if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
+ t.tcp_timeout) ||
+ nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+ t.tcp_fin_timeout))
+ goto nla_put_failure;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+ if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
+ goto nla_put_failure;
#endif
break;
}
case IPVS_CMD_GET_INFO:
- NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
- NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
- ip_vs_conn_tab_size);
+ if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
+ IP_VS_VERSION_CODE) ||
+ nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+ ip_vs_conn_tab_size))
+ goto nla_put_failure;
break;
}
@@ -3654,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_snat_reroute;
ipvs->sysctl_sync_ver = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ver;
+ ipvs->sysctl_sync_ports = 1;
+ tbl[idx++].data = &ipvs->sysctl_sync_ports;
+ ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+ ipvs->sysctl_sync_sock_size = 0;
+ tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
@@ -3661,11 +3720,14 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
+ tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+ ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
+ tbl[idx++].data = &ipvs->sysctl_sync_retries;
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
- ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
- tbl);
+ ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
if (!net_eq(net, &init_net))
kfree(tbl);
@@ -3680,7 +3742,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0;
}
-void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
+void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3692,7 +3754,7 @@ void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
#else
int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
#endif
@@ -3750,21 +3812,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
free_percpu(ipvs->tot_stats.cpustats);
}
-int __init ip_vs_control_init(void)
+int __init ip_vs_register_nl_ioctl(void)
{
- int idx;
int ret;
- EnterFunction(2);
-
- /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
- for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
- INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
- }
-
- smp_wmb(); /* Do we really need it now ? */
-
ret = nf_register_sockopt(&ip_vs_sockopts);
if (ret) {
pr_err("cannot register sockopt.\n");
@@ -3776,28 +3827,47 @@ int __init ip_vs_control_init(void)
pr_err("cannot register Generic Netlink interface.\n");
goto err_genl;
}
-
- ret = register_netdevice_notifier(&ip_vs_dst_notifier);
- if (ret < 0)
- goto err_notf;
-
- LeaveFunction(2);
return 0;
-err_notf:
- ip_vs_genl_unregister();
err_genl:
nf_unregister_sockopt(&ip_vs_sockopts);
err_sock:
return ret;
}
+void ip_vs_unregister_nl_ioctl(void)
+{
+ ip_vs_genl_unregister();
+ nf_unregister_sockopt(&ip_vs_sockopts);
+}
+
+int __init ip_vs_control_init(void)
+{
+ int idx;
+ int ret;
+
+ EnterFunction(2);
+
+ /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+ INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+ }
+
+ smp_wmb(); /* Do we really need it now ? */
+
+ ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+ if (ret < 0)
+ return ret;
+
+ LeaveFunction(2);
+ return 0;
+}
+
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
unregister_netdevice_notifier(&ip_vs_dst_notifier);
- ip_vs_genl_unregister();
- nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 1c269e56200a..8b7dca9ea422 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -68,7 +68,7 @@ struct ip_vs_dh_bucket {
/*
* Returns hash value for IPVS DH entry
*/
-static inline unsigned ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
{
__be32 addr_fold = addr->ip;
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
/* allocate the DH table for this service */
tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 538d74ee4f68..b20b29c903ef 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -177,7 +177,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
__be16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
- unsigned buf_len;
+ unsigned int buf_len;
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
struct ip_vs_app *app;
struct netns_ipvs *ipvs = net_ipvs(net);
+ if (!ipvs)
+ return -ENOENT;
app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
if (!app)
return -ENOMEM;
@@ -483,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
.exit = __ip_vs_ftp_exit,
};
-int __init ip_vs_ftp_init(void)
+static int __init ip_vs_ftp_init(void)
{
int rv;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 0f16283fd058..df646ccf08a7 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -142,7 +142,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
/*
* Returns hash value for IPVS LBLC entry
*/
-static inline unsigned
+static inline unsigned int
ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
{
__be32 addr_fold = addr->ip;
@@ -163,7 +163,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
static void
ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
- unsigned hash = ip_vs_lblc_hashkey(en->af, &en->addr);
+ unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
@@ -178,7 +178,7 @@ static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
const union nf_inet_addr *addr)
{
- unsigned hash = ip_vs_lblc_hashkey(af, addr);
+ unsigned int hash = ip_vs_lblc_hashkey(af, addr);
struct ip_vs_lblc_entry *en;
list_for_each_entry(en, &tbl->bucket[hash], list)
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
* Allocate the ip_vs_lblc_table for this service
*/
- tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+ tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
@@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ if (!ipvs)
+ return -ENOENT;
+
if (!net_eq(net, &init_net)) {
ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
sizeof(vs_vars_table),
@@ -563,8 +566,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
ipvs->lblc_ctl_header =
- register_net_sysctl_table(net, net_vs_ctl_path,
- ipvs->lblc_ctl_table);
+ register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table);
if (!ipvs->lblc_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index eec797f8cce7..570e31ea427a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -311,7 +311,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
/*
* Returns hash value for IPVS LBLCR entry
*/
-static inline unsigned
+static inline unsigned int
ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
{
__be32 addr_fold = addr->ip;
@@ -332,7 +332,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
static void
ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
- unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
+ unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
@@ -347,7 +347,7 @@ static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
const union nf_inet_addr *addr)
{
- unsigned hash = ip_vs_lblcr_hashkey(af, addr);
+ unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
struct ip_vs_lblcr_entry *en;
list_for_each_entry(en, &tbl->bucket[hash], list)
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
* Allocate the ip_vs_lblcr_table for this service
*/
- tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+ tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
@@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ if (!ipvs)
+ return -ENOENT;
+
if (!net_eq(net, &init_net)) {
ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
sizeof(vs_vars_table),
@@ -757,8 +760,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
ipvs->lblcr_ctl_header =
- register_net_sysctl_table(net, net_vs_ctl_path,
- ipvs->lblcr_ctl_table);
+ register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table);
if (!ipvs->lblcr_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f843a8833250..50d82186da87 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -48,7 +48,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
*/
static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
{
- unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+ unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
pp->next = ip_vs_proto_table[hash];
ip_vs_proto_table[hash] = pp;
@@ -59,9 +59,6 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
return 0;
}
-#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
- defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
- defined(CONFIG_IP_VS_PROTO_ESP)
/*
* register an ipvs protocols netns related data
*/
@@ -69,9 +66,9 @@ static int
register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
{
struct netns_ipvs *ipvs = net_ipvs(net);
- unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+ unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
struct ip_vs_proto_data *pd =
- kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+ kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
if (!pd)
return -ENOMEM;
@@ -81,12 +78,18 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
ipvs->proto_data_table[hash] = pd;
atomic_set(&pd->appcnt, 0); /* Init app counter */
- if (pp->init_netns != NULL)
- pp->init_netns(net, pd);
+ if (pp->init_netns != NULL) {
+ int ret = pp->init_netns(net, pd);
+ if (ret) {
+ /* unlink an free proto data */
+ ipvs->proto_data_table[hash] = pd->next;
+ kfree(pd);
+ return ret;
+ }
+ }
return 0;
}
-#endif
/*
* unregister an ipvs protocol
@@ -94,7 +97,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
{
struct ip_vs_protocol **pp_p;
- unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+ unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
pp_p = &ip_vs_proto_table[hash];
for (; *pp_p; pp_p = &(*pp_p)->next) {
@@ -117,7 +120,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data **pd_p;
- unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+ unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
pd_p = &ipvs->proto_data_table[hash];
for (; *pd_p; pd_p = &(*pd_p)->next) {
@@ -139,7 +142,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
{
struct ip_vs_protocol *pp;
- unsigned hash = IP_VS_PROTO_HASH(proto);
+ unsigned int hash = IP_VS_PROTO_HASH(proto);
for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
if (pp->protocol == proto)
@@ -153,11 +156,11 @@ EXPORT_SYMBOL(ip_vs_proto_get);
/*
* get ip_vs_protocol object data by netns and proto
*/
-struct ip_vs_proto_data *
+static struct ip_vs_proto_data *
__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
struct ip_vs_proto_data *pd;
- unsigned hash = IP_VS_PROTO_HASH(proto);
+ unsigned int hash = IP_VS_PROTO_HASH(proto);
for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
if (pd->pp->protocol == proto)
@@ -196,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
int *
ip_vs_create_timeout_table(int *table, int size)
{
- return kmemdup(table, size, GFP_ATOMIC);
+ return kmemdup(table, size, GFP_KERNEL);
}
@@ -316,22 +319,35 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
*/
int __net_init ip_vs_protocol_net_init(struct net *net)
{
+ int i, ret;
+ static struct ip_vs_protocol *protos[] = {
#ifdef CONFIG_IP_VS_PROTO_TCP
- register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+ &ip_vs_protocol_tcp,
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+ &ip_vs_protocol_udp,
#endif
#ifdef CONFIG_IP_VS_PROTO_SCTP
- register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+ &ip_vs_protocol_sctp,
#endif
#ifdef CONFIG_IP_VS_PROTO_AH
- register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+ &ip_vs_protocol_ah,
#endif
#ifdef CONFIG_IP_VS_PROTO_ESP
- register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+ &ip_vs_protocol_esp,
#endif
+ };
+
+ for (i = 0; i < ARRAY_SIZE(protos); i++) {
+ ret = register_ip_vs_proto_netns(net, protos[i]);
+ if (ret < 0)
+ goto cleanup;
+ }
return 0;
+
+cleanup:
+ ip_vs_protocol_net_cleanup(net);
+ return ret;
}
void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1fbf7a2816f5..9f3fb751c491 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1090,7 +1090,7 @@ out:
* timeouts is netns related now.
* ---------------------------------------------
*/
-static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
spin_lock_init(&ipvs->sctp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
+ if (!pd->timeout_table)
+ return -ENOMEM;
+ return 0;
}
static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index ef8641f7af83..cd609cc62721 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
* timeouts is netns related now.
* ---------------------------------------------
*/
-static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
+ if (!pd->timeout_table)
+ return -ENOMEM;
pd->tcp_state_table = tcp_states;
+ return 0;
}
static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index f4b7262896bb..2fedb2dcb3d1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
}
-static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
spin_lock_init(&ipvs->udp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
+ if (!pd->timeout_table)
+ return -ENOMEM;
+ return 0;
}
static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 069e8d4d5c01..05126521743e 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -70,7 +70,7 @@ struct ip_vs_sh_bucket {
/*
* Returns hash value for IPVS SH entry
*/
-static inline unsigned ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
{
__be32 addr_fold = addr->ip;
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
/* allocate the SH table for this service */
tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 8a0d6d6889f0..effa10c9e4e3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
struct net *net;
struct socket *sock;
char *buf;
+ int id;
};
/* Version 0 definition of packet sizes */
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
unsigned char *end;
};
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
-};
-
/*
* Copy of struct ip_vs_seq
* From unaligned network order to aligned host order
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
put_unaligned_be32(ho->previous_delta, &no->previous_delta);
}
-static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
+static inline struct ip_vs_sync_buff *
+sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
{
struct ip_vs_sync_buff *sb;
spin_lock_bh(&ipvs->sync_lock);
- if (list_empty(&ipvs->sync_queue)) {
+ if (list_empty(&ms->sync_queue)) {
sb = NULL;
+ __set_current_state(TASK_INTERRUPTIBLE);
} else {
- sb = list_entry(ipvs->sync_queue.next,
- struct ip_vs_sync_buff,
+ sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
list);
list_del(&sb->list);
+ ms->sync_queue_len--;
+ if (!ms->sync_queue_len)
+ ms->sync_queue_delay = 0;
}
spin_unlock_bh(&ipvs->sync_lock);
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
kfree(sb);
return NULL;
}
- sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
sb->mesg->version = SYNC_PROTO_VER;
sb->mesg->syncid = ipvs->master_syncid;
sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb);
}
-static inline void sb_queue_tail(struct netns_ipvs *ipvs)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs,
+ struct ipvs_master_sync_state *ms)
{
- struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+ struct ip_vs_sync_buff *sb = ms->sync_buff;
spin_lock(&ipvs->sync_lock);
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&sb->list, &ipvs->sync_queue);
- else
+ if (ipvs->sync_state & IP_VS_STATE_MASTER &&
+ ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
+ if (!ms->sync_queue_len)
+ schedule_delayed_work(&ms->master_wakeup_work,
+ max(IPVS_SYNC_SEND_DELAY, 1));
+ ms->sync_queue_len++;
+ list_add_tail(&sb->list, &ms->sync_queue);
+ if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
+ wake_up_process(ms->master_thread);
+ } else
ip_vs_sync_buff_release(sb);
spin_unlock(&ipvs->sync_lock);
}
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
* than the specified time or the specified time is zero.
*/
static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
+ unsigned long time)
{
struct ip_vs_sync_buff *sb;
spin_lock_bh(&ipvs->sync_buff_lock);
- if (ipvs->sync_buff &&
- time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
- sb = ipvs->sync_buff;
- ipvs->sync_buff = NULL;
+ sb = ms->sync_buff;
+ if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
+ ms->sync_buff = NULL;
+ __set_current_state(TASK_RUNNING);
} else
sb = NULL;
spin_unlock_bh(&ipvs->sync_buff_lock);
return sb;
}
-/*
- * Switch mode from sending version 0 or 1
- * - must handle sync_buf
- */
-void ip_vs_sync_switch_mode(struct net *net, int mode)
+static inline int
+select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
- return;
- if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
- return;
-
- spin_lock_bh(&ipvs->sync_buff_lock);
- /* Buffer empty ? then let buf_create do the job */
- if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
- kfree(ipvs->sync_buff);
- ipvs->sync_buff = NULL;
- } else {
- spin_lock_bh(&ipvs->sync_lock);
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&ipvs->sync_buff->list,
- &ipvs->sync_queue);
- else
- ip_vs_sync_buff_release(ipvs->sync_buff);
- spin_unlock_bh(&ipvs->sync_lock);
- }
- spin_unlock_bh(&ipvs->sync_buff_lock);
+ return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
}
/*
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
return sb;
}
+/* Check if conn should be synced.
+ * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
+ * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
+ * sync_retries times with period of sync_refresh_period/8
+ * - (2) if both sync_refresh_period and sync_period are 0 send sync only
+ * for state changes or only once when pkts matches sync_threshold
+ * - (3) templates: rate can be reduced only with sync_refresh_period or
+ * with (2)
+ */
+static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
+ struct ip_vs_conn *cp, int pkts)
+{
+ unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
+ unsigned long now = jiffies;
+ unsigned long n = (now + cp->timeout) & ~3UL;
+ unsigned int sync_refresh_period;
+ int sync_period;
+ int force;
+
+ /* Check if we sync in current state */
+ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
+ force = 0;
+ else if (likely(cp->protocol == IPPROTO_TCP)) {
+ if (!((1 << cp->state) &
+ ((1 << IP_VS_TCP_S_ESTABLISHED) |
+ (1 << IP_VS_TCP_S_FIN_WAIT) |
+ (1 << IP_VS_TCP_S_CLOSE) |
+ (1 << IP_VS_TCP_S_CLOSE_WAIT) |
+ (1 << IP_VS_TCP_S_TIME_WAIT))))
+ return 0;
+ force = cp->state != cp->old_state;
+ if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
+ goto set;
+ } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
+ if (!((1 << cp->state) &
+ ((1 << IP_VS_SCTP_S_ESTABLISHED) |
+ (1 << IP_VS_SCTP_S_CLOSED) |
+ (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
+ (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
+ return 0;
+ force = cp->state != cp->old_state;
+ if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
+ goto set;
+ } else {
+ /* UDP or another protocol with single state */
+ force = 0;
+ }
+
+ sync_refresh_period = sysctl_sync_refresh_period(ipvs);
+ if (sync_refresh_period > 0) {
+ long diff = n - orig;
+ long min_diff = max(cp->timeout >> 1, 10UL * HZ);
+
+ /* Avoid sync if difference is below sync_refresh_period
+ * and below the half timeout.
+ */
+ if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
+ int retries = orig & 3;
+
+ if (retries >= sysctl_sync_retries(ipvs))
+ return 0;
+ if (time_before(now, orig - cp->timeout +
+ (sync_refresh_period >> 3)))
+ return 0;
+ n |= retries + 1;
+ }
+ }
+ sync_period = sysctl_sync_period(ipvs);
+ if (sync_period > 0) {
+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
+ pkts % sync_period != sysctl_sync_threshold(ipvs))
+ return 0;
+ } else if (sync_refresh_period <= 0 &&
+ pkts != sysctl_sync_threshold(ipvs))
+ return 0;
+
+set:
+ cp->old_state = cp->state;
+ n = cmpxchg(&cp->sync_endtime, orig, n);
+ return n == orig || force;
+}
+
/*
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
+static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+ int pkts)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
+ struct ip_vs_sync_buff *buff;
+ struct ipvs_master_sync_state *ms;
+ int id;
int len;
if (unlikely(cp->af != AF_INET))
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
return;
+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+ return;
+
spin_lock(&ipvs->sync_buff_lock);
- if (!ipvs->sync_buff) {
- ipvs->sync_buff =
- ip_vs_sync_buff_create_v0(ipvs);
- if (!ipvs->sync_buff) {
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+ spin_unlock(&ipvs->sync_buff_lock);
+ return;
+ }
+
+ id = select_master_thread_id(ipvs, cp);
+ ms = &ipvs->ms[id];
+ buff = ms->sync_buff;
+ if (buff) {
+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+ /* Send buffer if it is for v1 */
+ if (!m->nr_conns) {
+ sb_queue_tail(ipvs, ms);
+ ms->sync_buff = NULL;
+ buff = NULL;
+ }
+ }
+ if (!buff) {
+ buff = ip_vs_sync_buff_create_v0(ipvs);
+ if (!buff) {
spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
+ ms->sync_buff = buff;
}
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
- s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+ s = (struct ip_vs_sync_conn_v0 *) buff->head;
/* copy members */
s->reserved = 0;
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
m->nr_conns++;
m->size += len;
- ipvs->sync_buff->head += len;
+ buff->head += len;
/* check if there is a space for next one */
- if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
- sb_queue_tail(ipvs);
- ipvs->sync_buff = NULL;
+ if (buff->head + FULL_CONN_SIZE > buff->end) {
+ sb_queue_tail(ipvs, ms);
+ ms->sync_buff = NULL;
}
spin_unlock(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
- if (cp->control)
- ip_vs_sync_conn(net, cp->control);
+ cp = cp->control;
+ if (cp) {
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+ pkts = atomic_add_return(1, &cp->in_pkts);
+ else
+ pkts = sysctl_sync_threshold(ipvs);
+ ip_vs_sync_conn(net, cp->control, pkts);
+ }
}
/*
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
+ struct ip_vs_sync_buff *buff;
+ struct ipvs_master_sync_state *ms;
+ int id;
__u8 *p;
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
if (sysctl_sync_ver(ipvs) == 0) {
- ip_vs_sync_conn_v0(net, cp);
+ ip_vs_sync_conn_v0(net, cp, pkts);
return;
}
/* Do not sync ONE PACKET */
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
goto control;
sloop:
+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+ goto control;
+
/* Sanity checks */
pe_name_len = 0;
if (cp->pe_data_len) {
@@ -541,6 +642,13 @@ sloop:
}
spin_lock(&ipvs->sync_buff_lock);
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+ spin_unlock(&ipvs->sync_buff_lock);
+ return;
+ }
+
+ id = select_master_thread_id(ipvs, cp);
+ ms = &ipvs->ms[id];
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -559,27 +667,32 @@ sloop:
/* check if there is a space for this one */
pad = 0;
- if (ipvs->sync_buff) {
- pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
- if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
- sb_queue_tail(ipvs);
- ipvs->sync_buff = NULL;
+ buff = ms->sync_buff;
+ if (buff) {
+ m = buff->mesg;
+ pad = (4 - (size_t) buff->head) & 3;
+ /* Send buffer if it is for v0 */
+ if (buff->head + len + pad > buff->end || m->reserved) {
+ sb_queue_tail(ipvs, ms);
+ ms->sync_buff = NULL;
+ buff = NULL;
pad = 0;
}
}
- if (!ipvs->sync_buff) {
- ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
- if (!ipvs->sync_buff) {
+ if (!buff) {
+ buff = ip_vs_sync_buff_create(ipvs);
+ if (!buff) {
spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
+ ms->sync_buff = buff;
+ m = buff->mesg;
}
- m = ipvs->sync_buff->mesg;
- p = ipvs->sync_buff->head;
- ipvs->sync_buff->head += pad + len;
+ p = buff->head;
+ buff->head += pad + len;
m->size += pad + len;
/* Add ev. padding from prev. sync_conn */
while (pad--)
@@ -644,16 +757,10 @@ control:
cp = cp->control;
if (!cp)
return;
- /*
- * Reduce sync rate for templates
- * i.e only increment in_pkts for Templates.
- */
- if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
- int pkts = atomic_add_return(1, &cp->in_pkts);
-
- if (pkts % sysctl_sync_period(ipvs) != 1)
- return;
- }
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+ pkts = atomic_add_return(1, &cp->in_pkts);
+ else
+ pkts = sysctl_sync_threshold(ipvs);
goto sloop;
}
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
else
cp = ip_vs_ct_in_get(param);
- if (cp && param->pe_data) /* Free pe_data */
+ if (cp) {
+ /* Free pe_data */
kfree(param->pe_data);
- if (!cp) {
+
+ dest = cp->dest;
+ spin_lock(&cp->lock);
+ if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
+ !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
+ if (flags & IP_VS_CONN_F_INACTIVE) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ } else {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ }
+ }
+ flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
+ flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
+ cp->flags = flags;
+ spin_unlock(&cp->lock);
+ if (!dest) {
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ }
+ } else {
/*
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark, flags);
- /* Set the approprite ativity flag */
- if (protocol == IPPROTO_TCP) {
- if (state != IP_VS_TCP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- } else if (protocol == IPPROTO_SCTP) {
- if (state != IP_VS_SCTP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- }
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
if (dest)
atomic_dec(&dest->refcnt);
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
return;
}
- } else if (!cp->dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
- (cp->state != state)) {
- /* update active/inactive flag for the connection */
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_TCP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags |= IP_VS_CONN_F_INACTIVE;
- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state == IP_VS_TCP_S_ESTABLISHED)) {
- atomic_inc(&dest->activeconns);
- atomic_dec(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
- }
- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
- (cp->state != state)) {
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_SCTP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
- }
}
if (opt)
@@ -839,7 +929,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
for (i=0; i<m->nr_conns; i++) {
- unsigned flags, state;
+ unsigned int flags, state;
if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
@@ -1109,7 +1199,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
for (i=0; i<nr_conns; i++) {
union ip_vs_sync_conn *s;
- unsigned size;
+ unsigned int size;
int retc;
p = msg_end;
@@ -1149,6 +1239,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
/*
+ * Setup sndbuf (mode=1) or rcvbuf (mode=0)
+ */
+static void set_sock_size(struct sock *sk, int mode, int val)
+{
+ /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
+ /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
+ lock_sock(sk);
+ if (mode) {
+ val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+ sysctl_wmem_max);
+ sk->sk_sndbuf = val * 2;
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ } else {
+ val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+ sysctl_rmem_max);
+ sk->sk_rcvbuf = val * 2;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ }
+ release_sock(sk);
+}
+
+/*
* Setup loopback of outgoing multicasts on a sending socket
*/
static void set_mcast_loop(struct sock *sk, u_char loop)
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct net *net)
+static struct socket *make_send_sock(struct net *net, int id)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ /* multicast addr */
+ struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
+ };
struct socket *sock;
int result;
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
+ result = sysctl_sync_sock_size(ipvs);
+ if (result > 0)
+ set_sock_size(sock->sk, 1, result);
result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
if (result < 0) {
@@ -1349,9 +1470,15 @@ error:
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct net *net)
+static struct socket *make_receive_sock(struct net *net, int id)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ /* multicast addr */
+ struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
+ };
struct socket *sock;
int result;
@@ -1368,7 +1495,10 @@ static struct socket *make_receive_sock(struct net *net)
*/
sk_change_net(sock->sk, net);
/* it is equivalent to the REUSEADDR option in user-space */
- sock->sk->sk_reuse = 1;
+ sock->sk->sk_reuse = SK_CAN_REUSE;
+ result = sysctl_sync_sock_size(ipvs);
+ if (result > 0)
+ set_sock_size(sock->sk, 0, result);
result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
sizeof(struct sockaddr));
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
return len;
}
-static void
+static int
ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
{
int msize;
+ int ret;
msize = msg->size;
/* Put size in network byte order */
msg->size = htons(msg->size);
- if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
- pr_err("ip_vs_send_async error\n");
+ ret = ip_vs_send_async(sock, (char *)msg, msize);
+ if (ret >= 0 || ret == -EAGAIN)
+ return ret;
+ pr_err("ip_vs_send_async error %d\n", ret);
+ return 0;
}
static int
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
iov.iov_base = buffer;
iov.iov_len = (size_t)buflen;
- len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+ len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
if (len < 0)
- return -1;
+ return len;
LeaveFunction(7);
return len;
}
+/* Wakeup the master thread for sending */
+static void master_wakeup_work_handler(struct work_struct *work)
+{
+ struct ipvs_master_sync_state *ms =
+ container_of(work, struct ipvs_master_sync_state,
+ master_wakeup_work.work);
+ struct netns_ipvs *ipvs = ms->ipvs;
+
+ spin_lock_bh(&ipvs->sync_lock);
+ if (ms->sync_queue_len &&
+ ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+ ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
+ wake_up_process(ms->master_thread);
+ }
+ spin_unlock_bh(&ipvs->sync_lock);
+}
+
+/* Get next buffer to send */
+static inline struct ip_vs_sync_buff *
+next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
+{
+ struct ip_vs_sync_buff *sb;
+
+ sb = sb_dequeue(ipvs, ms);
+ if (sb)
+ return sb;
+ /* Do not delay entries in buffer for more than 2 seconds */
+ return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
+}
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
+ struct sock *sk = tinfo->sock->sk;
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
- "syncid = %d\n",
- ipvs->master_mcast_ifn, ipvs->master_syncid);
+ "syncid = %d, id = %d\n",
+ ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
- while (!kthread_should_stop()) {
- while ((sb = sb_dequeue(ipvs))) {
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
+ for (;;) {
+ sb = next_sync_buff(ipvs, ms);
+ if (unlikely(kthread_should_stop()))
+ break;
+ if (!sb) {
+ schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
+ continue;
}
-
- /* check if entries stay in ipvs->sync_buff for 2 seconds */
- sb = get_curr_sync_buff(ipvs, 2 * HZ);
- if (sb) {
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
+ while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
+ int ret = 0;
+
+ __wait_event_interruptible(*sk_sleep(sk),
+ sock_writeable(sk) ||
+ kthread_should_stop(),
+ ret);
+ if (unlikely(kthread_should_stop()))
+ goto done;
}
-
- schedule_timeout_interruptible(HZ);
+ ip_vs_sync_buff_release(sb);
}
+done:
+ __set_current_state(TASK_RUNNING);
+ if (sb)
+ ip_vs_sync_buff_release(sb);
+
/* clean up the sync_buff queue */
- while ((sb = sb_dequeue(ipvs)))
+ while ((sb = sb_dequeue(ipvs, ms)))
ip_vs_sync_buff_release(sb);
+ __set_current_state(TASK_RUNNING);
/* clean up the current sync_buff */
- sb = get_curr_sync_buff(ipvs, 0);
+ sb = get_curr_sync_buff(ipvs, ms, 0);
if (sb)
ip_vs_sync_buff_release(sb);
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
- "syncid = %d\n",
- ipvs->backup_mcast_ifn, ipvs->backup_syncid);
+ "syncid = %d, id = %d\n",
+ ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1511,7 +1687,8 @@ static int sync_thread_backup(void *data)
len = ip_vs_receive(tinfo->sock, tinfo->buf,
ipvs->recv_mesg_maxlen);
if (len <= 0) {
- pr_err("receiving message error\n");
+ if (len != -EAGAIN)
+ pr_err("receiving message error\n");
break;
}
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
{
struct ip_vs_sync_thread_data *tinfo;
- struct task_struct **realtask, *task;
+ struct task_struct **array = NULL, *task;
struct socket *sock;
struct netns_ipvs *ipvs = net_ipvs(net);
- char *name, *buf = NULL;
+ char *name;
int (*threadfn)(void *data);
+ int id, count;
int result = -ENOMEM;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
sizeof(struct ip_vs_sync_conn_v0));
+ if (!ipvs->sync_state) {
+ count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
+ ipvs->threads_mask = count - 1;
+ } else
+ count = ipvs->threads_mask + 1;
if (state == IP_VS_STATE_MASTER) {
- if (ipvs->master_thread)
+ if (ipvs->ms)
return -EEXIST;
strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
sizeof(ipvs->master_mcast_ifn));
ipvs->master_syncid = syncid;
- realtask = &ipvs->master_thread;
- name = "ipvs_master:%d";
+ name = "ipvs-m:%d:%d";
threadfn = sync_thread_master;
- sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
- if (ipvs->backup_thread)
+ if (ipvs->backup_threads)
return -EEXIST;
strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
sizeof(ipvs->backup_mcast_ifn));
ipvs->backup_syncid = syncid;
- realtask = &ipvs->backup_thread;
- name = "ipvs_backup:%d";
+ name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup;
- sock = make_receive_sock(net);
} else {
return -EINVAL;
}
- if (IS_ERR(sock)) {
- result = PTR_ERR(sock);
- goto out;
- }
+ if (state == IP_VS_STATE_MASTER) {
+ struct ipvs_master_sync_state *ms;
- set_sync_mesg_maxlen(net, state);
- if (state == IP_VS_STATE_BACKUP) {
- buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
- if (!buf)
- goto outsocket;
+ ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
+ if (!ipvs->ms)
+ goto out;
+ ms = ipvs->ms;
+ for (id = 0; id < count; id++, ms++) {
+ INIT_LIST_HEAD(&ms->sync_queue);
+ ms->sync_queue_len = 0;
+ ms->sync_queue_delay = 0;
+ INIT_DELAYED_WORK(&ms->master_wakeup_work,
+ master_wakeup_work_handler);
+ ms->ipvs = ipvs;
+ }
+ } else {
+ array = kzalloc(count * sizeof(struct task_struct *),
+ GFP_KERNEL);
+ if (!array)
+ goto out;
}
+ set_sync_mesg_maxlen(net, state);
- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
- if (!tinfo)
- goto outbuf;
-
- tinfo->net = net;
- tinfo->sock = sock;
- tinfo->buf = buf;
+ tinfo = NULL;
+ for (id = 0; id < count; id++) {
+ if (state == IP_VS_STATE_MASTER)
+ sock = make_send_sock(net, id);
+ else
+ sock = make_receive_sock(net, id);
+ if (IS_ERR(sock)) {
+ result = PTR_ERR(sock);
+ goto outtinfo;
+ }
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ goto outsocket;
+ tinfo->net = net;
+ tinfo->sock = sock;
+ if (state == IP_VS_STATE_BACKUP) {
+ tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
+ GFP_KERNEL);
+ if (!tinfo->buf)
+ goto outtinfo;
+ }
+ tinfo->id = id;
- task = kthread_run(threadfn, tinfo, name, ipvs->gen);
- if (IS_ERR(task)) {
- result = PTR_ERR(task);
- goto outtinfo;
+ task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ goto outtinfo;
+ }
+ tinfo = NULL;
+ if (state == IP_VS_STATE_MASTER)
+ ipvs->ms[id].master_thread = task;
+ else
+ array[id] = task;
}
/* mark as active */
- *realtask = task;
+
+ if (state == IP_VS_STATE_BACKUP)
+ ipvs->backup_threads = array;
+ spin_lock_bh(&ipvs->sync_buff_lock);
ipvs->sync_state |= state;
+ spin_unlock_bh(&ipvs->sync_buff_lock);
/* increase the module use count */
ip_vs_use_count_inc();
return 0;
-outtinfo:
- kfree(tinfo);
-outbuf:
- kfree(buf);
outsocket:
sk_release_kernel(sock->sk);
+
+outtinfo:
+ if (tinfo) {
+ sk_release_kernel(tinfo->sock->sk);
+ kfree(tinfo->buf);
+ kfree(tinfo);
+ }
+ count = id;
+ while (count-- > 0) {
+ if (state == IP_VS_STATE_MASTER)
+ kthread_stop(ipvs->ms[count].master_thread);
+ else
+ kthread_stop(array[count]);
+ }
+ kfree(array);
+
out:
+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+ kfree(ipvs->ms);
+ ipvs->ms = NULL;
+ }
return result;
}
@@ -1622,38 +1853,60 @@ out:
int stop_sync_thread(struct net *net, int state)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ struct task_struct **array;
+ int id;
int retc = -EINVAL;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
if (state == IP_VS_STATE_MASTER) {
- if (!ipvs->master_thread)
+ if (!ipvs->ms)
return -ESRCH;
- pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(ipvs->master_thread));
-
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
* add sync buffers to the queue, when we are already in
* progress of stopping the master sync daemon.
*/
- spin_lock_bh(&ipvs->sync_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ spin_lock(&ipvs->sync_lock);
ipvs->sync_state &= ~IP_VS_STATE_MASTER;
- spin_unlock_bh(&ipvs->sync_lock);
- retc = kthread_stop(ipvs->master_thread);
- ipvs->master_thread = NULL;
+ spin_unlock(&ipvs->sync_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
+
+ retc = 0;
+ for (id = ipvs->threads_mask; id >= 0; id--) {
+ struct ipvs_master_sync_state *ms = &ipvs->ms[id];
+ int ret;
+
+ pr_info("stopping master sync thread %d ...\n",
+ task_pid_nr(ms->master_thread));
+ cancel_delayed_work_sync(&ms->master_wakeup_work);
+ ret = kthread_stop(ms->master_thread);
+ if (retc >= 0)
+ retc = ret;
+ }
+ kfree(ipvs->ms);
+ ipvs->ms = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!ipvs->backup_thread)
+ if (!ipvs->backup_threads)
return -ESRCH;
- pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(ipvs->backup_thread));
-
ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
- retc = kthread_stop(ipvs->backup_thread);
- ipvs->backup_thread = NULL;
+ array = ipvs->backup_threads;
+ retc = 0;
+ for (id = ipvs->threads_mask; id >= 0; id--) {
+ int ret;
+
+ pr_info("stopping backup sync thread %d ...\n",
+ task_pid_nr(array[id]));
+ ret = kthread_stop(array[id]);
+ if (retc >= 0)
+ retc = ret;
+ }
+ kfree(array);
+ ipvs->backup_threads = NULL;
}
/* decrease the module use count */
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
- INIT_LIST_HEAD(&ipvs->sync_queue);
spin_lock_init(&ipvs->sync_lock);
spin_lock_init(&ipvs->sync_buff_lock);
-
- ipvs->sync_mcast_addr.sin_family = AF_INET;
- ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
- ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index fd0d4e09876a..231be7dd547a 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
/*
* Allocate the mark variable for WRR scheduling
*/
- mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+ mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);
if (mark == NULL)
return -ENOMEM;