aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs/ip_vs_ctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_ctl.c')
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c143
1 files changed, 108 insertions, 35 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8d14a1acbc37..988222fff9f0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -24,7 +24,6 @@
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/workqueue.h>
-#include <linux/swap.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -48,6 +47,8 @@
#include <net/ip_vs.h>
+MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME);
+
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex);
@@ -210,6 +211,17 @@ static void update_defense_level(struct netns_ipvs *ipvs)
local_bh_enable();
}
+/* Handler for delayed work for expiring no
+ * destination connections
+ */
+static void expire_nodest_conn_handler(struct work_struct *work)
+{
+ struct netns_ipvs *ipvs;
+
+ ipvs = container_of(work, struct netns_ipvs,
+ expire_nodest_conn_work.work);
+ ip_vs_expire_nodest_conn_flush(ipvs);
+}
/*
* Timer for checking the defense
@@ -224,7 +236,8 @@ static void defense_work_handler(struct work_struct *work)
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
ip_vs_random_dropentry(ipvs);
- schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+ queue_delayed_work(system_long_wq, &ipvs->defense_work,
+ DEFENSE_TIMER_PERIOD);
}
#endif
@@ -947,8 +960,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Create a destination for the given service
*/
static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
- struct ip_vs_dest **dest_p)
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
unsigned int atype, i;
@@ -1008,8 +1020,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1);
- *dest_p = dest;
-
LeaveFunction(2);
return 0;
@@ -1083,7 +1093,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Allocate and initialize the dest structure
*/
- ret = ip_vs_new_dest(svc, udest, &dest);
+ ret = ip_vs_new_dest(svc, udest);
}
LeaveFunction(2);
@@ -1163,6 +1173,12 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
list_add(&dest->t_list, &ipvs->dest_trash);
dest->idle_start = 0;
spin_unlock_bh(&ipvs->dest_trash_lock);
+
+ /* Queue up delayed work to expire all no destination connections.
+ * No-op when CONFIG_SYSCTL is disabled.
+ */
+ if (!cleanup)
+ ip_vs_enqueue_expire_nodest_conns(ipvs);
}
@@ -1272,6 +1288,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
+ int ret_hooks = -1;
/* increase the module use count */
if (!ip_vs_use_count_inc())
@@ -1313,6 +1330,14 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
}
#endif
+ if ((u->af == AF_INET && !ipvs->num_services) ||
+ (u->af == AF_INET6 && !ipvs->num_services6)) {
+ ret = ip_vs_register_hooks(ipvs, u->af);
+ if (ret < 0)
+ goto out_err;
+ ret_hooks = ret;
+ }
+
svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
if (svc == NULL) {
IP_VS_DBG(1, "%s(): no memory\n", __func__);
@@ -1340,7 +1365,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
svc->port = u->port;
svc->fwmark = u->fwmark;
- svc->flags = u->flags;
+ svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
svc->ipvs = ipvs;
@@ -1374,6 +1399,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services++;
+ else if (svc->af == AF_INET6)
+ ipvs->num_services6++;
/* Hash the service into the service table */
ip_vs_svc_hash(svc);
@@ -1385,6 +1412,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
out_err:
+ if (ret_hooks >= 0)
+ ip_vs_unregister_hooks(ipvs, u->af);
if (svc != NULL) {
ip_vs_unbind_scheduler(svc, sched);
ip_vs_service_free(svc);
@@ -1500,9 +1529,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_pe *old_pe;
struct netns_ipvs *ipvs = svc->ipvs;
- /* Count only IPv4 services for old get/setsockopt interface */
- if (svc->af == AF_INET)
+ if (svc->af == AF_INET) {
ipvs->num_services--;
+ if (!ipvs->num_services)
+ ip_vs_unregister_hooks(ipvs, svc->af);
+ } else if (svc->af == AF_INET6) {
+ ipvs->num_services6--;
+ if (!ipvs->num_services6)
+ ip_vs_unregister_hooks(ipvs, svc->af);
+ }
ip_vs_stop_estimator(svc->ipvs, &svc->stats);
@@ -1732,11 +1767,9 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
-static int three = 3;
-
static int
proc_do_defense_mode(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
@@ -1763,7 +1796,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
static int
proc_do_sync_threshold(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val[2];
@@ -1788,7 +1821,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
static int
proc_do_sync_ports(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val = *valp;
@@ -1942,7 +1975,7 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "nat_icmp_send",
@@ -1980,6 +2013,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "run_estimation",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2414,7 +2453,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
}
static int
-do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
{
struct net *net = sock_net(sk);
int ret;
@@ -2438,7 +2477,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
return -EINVAL;
}
- if (copy_from_user(arg, user, len) != 0)
+ if (copy_from_sockptr(arg, ptr, len) != 0)
return -EFAULT;
/* Handle daemons since they have another lock */
@@ -2471,6 +2510,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Set timeout values for (tcp tcpfin udp) */
ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
+ } else if (!len) {
+ /* No more commands with len == 0 below */
+ ret = -EINVAL;
+ goto out_unlock;
}
usvc_compat = (struct ip_vs_service_user *)arg;
@@ -2547,9 +2590,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
case IP_VS_SO_SET_DELDEST:
ret = ip_vs_del_dest(svc, &udest);
- break;
- default:
- ret = -EINVAL;
}
out_unlock:
@@ -2571,7 +2611,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
+ strscpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2765,13 +2805,13 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
+ strscpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn));
d[0].syncid = ipvs->mcfg.syncid;
}
if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
+ strscpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn));
d[1].syncid = ipvs->bcfg.syncid;
}
@@ -3521,7 +3561,7 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ strscpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
sizeof(c.mcast_ifn));
c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
@@ -3855,7 +3895,7 @@ out:
}
-static const struct genl_ops ip_vs_genl_ops[] = {
+static const struct genl_small_ops ip_vs_genl_ops[] = {
{
.cmd = IPVS_CMD_NEW_SERVICE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -3963,8 +4003,9 @@ static struct genl_family ip_vs_genl_family __ro_after_init = {
.policy = ip_vs_cmd_policy,
.netnsok = true, /* Make ipvsadm to work on netns */
.module = THIS_MODULE,
- .ops = ip_vs_genl_ops,
- .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
+ .small_ops = ip_vs_genl_ops,
+ .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops),
+ .resv_start_op = IPVS_CMD_FLUSH + 1,
};
static int __init ip_vs_genl_register(void)
@@ -4052,6 +4093,13 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+ ipvs->sysctl_run_estimation = 1;
+ tbl[idx++].data = &ipvs->sysctl_run_estimation;
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Global sysctls must be ro in non-init netns */
+ if (!net_eq(net, &init_net))
+ tbl[idx++].mode = 0444;
+#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
@@ -4063,7 +4111,12 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
- schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+ queue_delayed_work(system_long_wq, &ipvs->defense_work,
+ DEFENSE_TIMER_PERIOD);
+
+ /* Init delayed work for expiring no dest conn */
+ INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work,
+ expire_nodest_conn_handler);
return 0;
}
@@ -4072,6 +4125,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
{
struct net *net = ipvs->net;
+ cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work);
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
@@ -4123,12 +4177,18 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
spin_lock_init(&ipvs->tot_stats.lock);
- proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
- sizeof(struct ip_vs_iter));
- proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
- ip_vs_stats_show, NULL);
- proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
- ip_vs_stats_percpu_show, NULL);
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net,
+ &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)))
+ goto err_vs;
+ if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
+ ip_vs_stats_show, NULL))
+ goto err_stats;
+ if (!proc_create_net_single("ip_vs_stats_percpu", 0,
+ ipvs->net->proc_net,
+ ip_vs_stats_percpu_show, NULL))
+ goto err_percpu;
+#endif
if (ip_vs_control_net_init_sysctl(ipvs))
goto err;
@@ -4136,6 +4196,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
return 0;
err:
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+
+err_percpu:
+ remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+
+err_stats:
+ remove_proc_entry("ip_vs", ipvs->net->proc_net);
+
+err_vs:
+#endif
free_percpu(ipvs->tot_stats.cpustats);
return -ENOMEM;
}
@@ -4144,9 +4215,11 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
ip_vs_trash_cleanup(ipvs);
ip_vs_control_net_cleanup_sysctl(ipvs);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
remove_proc_entry("ip_vs", ipvs->net->proc_net);
+#endif
free_percpu(ipvs->tot_stats.cpustats);
}