From a5959d53d6048a56103ee0ade1eb6f2c0c733b1d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:10 +0100 Subject: IPVS: Handle Scheduling errors. If ip_vs_conn_fill_param_persist return an error to ip_vs_sched_persist, this error must propagate as ignored=-1 to ip_vs_schedule(). Errors from ip_vs_conn_new() in ip_vs_sched_persist() and ip_vs_schedule() should also return *ignored=-1; This patch just relies on the fact that ignored is 1 before calling ip_vs_sched_persist(). Sent from Julian: "The new case when ip_vs_conn_fill_param_persist fails should set *ignored = -1, so that we can use NF_DROP, see below. *ignored = -1 should be also used for ip_vs_conn_new failure in ip_vs_sched_persist() and ip_vs_schedule(). The new negative value should be handled in tcp,udp,sctp" "To summarize: - *ignored = 1: protocol tried to schedule (eg. on SYN), found svc but the svc/scheduler decides that this packet should be accepted with NF_ACCEPT because it must not be scheduled. - *ignored = 0: scheduler can not find destination, so try bypass or return ICMP and then NF_DROP (ip_vs_leave). - *ignored = -1: scheduler tried to schedule but fatal error occurred, eg. ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param failure such as missing Call-ID, ENOMEM on skb_linearize or pe_data. In this case we should return NF_DROP without any attempts to send ICMP with ip_vs_leave." More or less all ideas and input to this patch is work from Julian Anastasov Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index f6c5200e2146..1cdab12abfef 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -64,12 +64,18 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pp, &ignored); - if (!*cpp && !ignored) { - *verdict = ip_vs_leave(svc, skb, pp); + if (!*cpp && ignored <= 0) { + if (!ignored) + *verdict = ip_vs_leave(svc, skb, pp); + else { + ip_vs_service_put(svc); + *verdict = NF_DROP; + } return 0; } ip_vs_service_put(svc); } + /* NF_ACCEPT */ return 1; } -- cgit v1.2.3-59-g8ed1b From fc723250c9cb046cc19833a2b1c4309bbf59ac36 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:43 +0100 Subject: IPVS: netns to services part 1 Services hash tables got netns ptr a hash arg, While Real Servers (rs) has been moved to ipvs struct. Two new inline functions added to get net ptr from skb. Since ip_vs is called from different contexts there is two places to dig for the net ptr skb->dev or skb->sk this is handled in skb_net() and skb_sknet() Global functions, ip_vs_service_get() ip_vs_lookup_real_service() etc have got struct net *net as first param. If possible get net ptr skb etc, - if not &init_net is used at this early stage of patching. ip_vs_ctl.c procfs not ready for netns yet. *v3 Comments by Julian - __ip_vs_service_find and __ip_vs_svc_fwm_find are fast path, net_eq(svc->net, net) so the check is at the end now. - net = skb_net(skb) in ip_vs_out moved after check for skb_dst. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 64 +++++++++- include/net/netns/ip_vs.h | 8 ++ net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 232 +++++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 9 files changed, 214 insertions(+), 115 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c1c2ece3ed94..d551e0d8fd9a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -37,6 +37,59 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) { return net->ipvs; } +/* + * Get net ptr from skb in traffic cases + * use skb_sknet when call is from userland (ioctl or netlink) + */ +static inline struct net *skb_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS +#ifdef CONFIG_IP_VS_DEBUG + /* + * This is used for debug only. + * Start with the most likely hit + * End with BUG + */ + if (likely(skb->dev && skb->dev->nd_net)) + return dev_net(skb->dev); + if (skb_dst(skb)->dev) + return dev_net(skb_dst(skb)->dev); + WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", + __func__, __LINE__); + if (likely(skb->sk && skb->sk->sk_net)) + return sock_net(skb->sk); + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", + __func__, __LINE__); + BUG(); +#else + return dev_net(skb->dev ? : skb_dst(skb)->dev); +#endif +#else + return &init_net; +#endif +} + +static inline struct net *skb_sknet(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS +#ifdef CONFIG_IP_VS_DEBUG + /* Start with the most likely hit */ + if (likely(skb->sk && skb->sk->sk_net)) + return sock_net(skb->sk); + WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", + __func__, __LINE__); + if (likely(skb->dev && skb->dev->nd_net)) + return dev_net(skb->dev); + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", + __func__, __LINE__); + BUG(); +#else + return sock_net(skb->sk); +#endif +#else + return &init_net; +#endif +} /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -496,6 +549,7 @@ struct ip_vs_service { unsigned flags; /* service status flags */ unsigned timeout; /* persistent timeout in ticks */ __be32 netmask; /* grouping granularity */ + struct net *net; struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ @@ -896,7 +950,7 @@ extern int sysctl_ip_vs_sync_ver; extern void ip_vs_sync_switch_mode(int mode); extern struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) @@ -905,7 +959,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); extern int ip_vs_use_count_inc(void); @@ -913,9 +967,9 @@ extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * -ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, - const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, - __u32 fwmark); +ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, + __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, + __u16 protocol, __u32 fwmark); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 12fe84087cec..5b87d22a39fb 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -20,6 +20,14 @@ struct ctl_table_header; struct netns_ipvs { int gen; /* Generation */ + /* + * Hash table: for real service lookups + */ + #define IP_VS_RTAB_BITS 4 + #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) + #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) + + struct list_head rs_table[IP_VS_RTAB_SIZE]; }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7c1b502f8d8d..7a0e79e3ad0f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -611,7 +611,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 206f40c548d7..d0616ea1eebf 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1031,6 +1031,7 @@ drop: static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -1054,6 +1055,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!skb_dst(skb))) return NF_ACCEPT; + net = skb_net(skb); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1119,7 +1121,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(af, iph.protocol, + if (ip_vs_lookup_real_service(net, af, iph.protocol, &iph.saddr, pptr[0])) { /* diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ceeef4352d34..2d7c96bd2114 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -287,15 +287,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * Hash table: for real service lookups - */ -#define IP_VS_RTAB_BITS 4 -#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) -#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - -static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; - /* * Trash for destinations */ @@ -311,9 +302,9 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); /* * Returns hash value for virtual service */ -static __inline__ unsigned -ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, - __be16 port) +static inline unsigned +ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, + const union nf_inet_addr *addr, __be16 port) { register unsigned porth = ntohs(port); __be32 addr_fold = addr->ip; @@ -323,6 +314,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif + addr_fold ^= ((size_t)net>>8); return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; @@ -331,13 +323,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, /* * Returns hash value of fwmark for virtual service lookup */ -static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) +static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) { - return fwmark & IP_VS_SVC_TAB_MASK; + return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* - * Hashes a service in the ip_vs_svc_table by + * Hashes a service in the ip_vs_svc_table by * or in the ip_vs_svc_fwm_table by fwmark. * Should be called with locked tables. */ @@ -353,16 +345,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in ip_vs_svc_table + * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, - svc->port); + hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + &svc->addr, svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* - * Hash it by fwmark in ip_vs_svc_fwm_table + * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -374,7 +366,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* - * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. + * Unhashes a service from svc_table / svc_fwm_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) @@ -386,10 +378,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) } if (svc->fwmark == 0) { - /* Remove it from the ip_vs_svc_table table */ + /* Remove it from the svc_table table */ list_del(&svc->s_list); } else { - /* Remove it from the ip_vs_svc_fwm_table table */ + /* Remove it from the svc_fwm_table table */ list_del(&svc->f_list); } @@ -400,23 +392,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* - * Get service by {proto,addr,port} in the service table. + * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, - __be16 vport) +__ip_vs_service_find(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) - && (svc->protocol == protocol)) { + && (svc->protocol == protocol) + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -430,16 +423,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(fwmark); + hash = ip_vs_svc_fwm_hashkey(net, fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af) { + if (svc->fwmark == fwmark && svc->af == af + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -449,7 +443,7 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -459,14 +453,15 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (fwmark && svc) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_find(af, protocol, vaddr, vport); + svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -476,7 +471,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -484,7 +479,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(af, protocol, vaddr, 0); + svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); } out: @@ -545,10 +540,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, } /* - * Hashes ip_vs_dest in ip_vs_rtable by . + * Hashes ip_vs_dest in rs_table by . * should be called with locked tables. */ -static int ip_vs_rs_hash(struct ip_vs_dest *dest) +static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned hash; @@ -562,19 +557,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ip_vs_rtable[hash]); + list_add(&dest->d_list, &ipvs->rs_table[hash]); return 1; } /* - * UNhashes ip_vs_dest from ip_vs_rtable. + * UNhashes ip_vs_dest from rs_table. * should be called with locked tables. */ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* - * Remove it from the ip_vs_rtable table. + * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { list_del(&dest->d_list); @@ -588,10 +583,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { + struct netns_ipvs *ipvs = net_ipvs(net); unsigned hash; struct ip_vs_dest *dest; @@ -602,7 +598,7 @@ ip_vs_lookup_real_service(int af, __u16 protocol, hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); - list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { + list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) @@ -652,7 +648,8 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, +struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, + const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark) @@ -660,7 +657,7 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -768,6 +765,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); int conn_flags; /* set the weight and the flags */ @@ -780,11 +778,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* - * Put the real service in ip_vs_rtable if not present. + * Put the real service in rs_table if not present. * For now only for NAT! */ write_lock_bh(&__ip_vs_rs_lock); - ip_vs_rs_hash(dest); + ip_vs_rs_hash(ipvs, dest); write_unlock_bh(&__ip_vs_rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1117,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0; @@ -1172,6 +1170,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; + svc->net = net; INIT_LIST_HEAD(&svc->destinations); rwlock_init(&svc->sched_lock); @@ -1428,17 +1427,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(void) +static int ip_vs_flush(struct net *net) { int idx; struct ip_vs_service *svc, *nxt; /* - * Flush the service table hashed by + * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { - ip_vs_unlink_service(svc); + list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], + s_list) { + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1448,7 +1449,8 @@ static int ip_vs_flush(void) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_unlink_service(svc); + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1472,20 +1474,22 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(void) +static int ip_vs_zero_all(struct net *net) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } @@ -1763,6 +1767,7 @@ static struct ctl_table_header * sysctl_header; #ifdef CONFIG_PROC_FS struct ip_vs_iter { + struct seq_net_private p; /* Do not move this, netns depends upon it*/ struct list_head *table; int bucket; }; @@ -1789,6 +1794,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) /* Get the Nth entry in the two lists */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { + struct net *net = seq_file_net(seq); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1796,7 +1802,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (pos-- == 0){ + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1807,7 +1813,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (pos-- == 0) { + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -1961,7 +1967,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip_vs_info_seq_ops, + return seq_open_net(inode, file, &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)); } @@ -2011,7 +2017,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, ip_vs_stats_show, NULL); + return single_open_net(inode, file, ip_vs_stats_show); } static const struct file_operations ip_vs_stats_fops = { @@ -2113,6 +2119,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { + struct net *net = sock_net(sk); int ret; unsigned char arg[MAX_ARG_LEN]; struct ip_vs_service_user *usvc_compat; @@ -2147,7 +2154,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -2174,7 +2181,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out_unlock; } } @@ -2191,10 +2198,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_find(usvc.af, usvc.protocol, + svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2207,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2267,7 +2274,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(const struct ip_vs_get_services *get, +__ip_vs_get_service_entries(struct net *net, + const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { int idx, count=0; @@ -2278,7 +2286,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2297,7 +2305,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2317,7 +2325,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, } static inline int -__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2325,9 +2333,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); if (svc) { @@ -2401,7 +2409,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) unsigned char arg[128]; int ret = 0; unsigned int copylen; + struct net *net = sock_net(sk); + BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2463,7 +2473,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(get, user); + ret = __ip_vs_get_service_entries(net, get, user); } break; @@ -2476,10 +2486,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(AF_INET, entry->protocol, - &addr, entry->port); + svc = __ip_vs_service_find(net, AF_INET, + entry->protocol, &addr, + entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2502,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(get, user); + ret = __ip_vs_get_dest_entries(net, get, user); } break; @@ -2722,11 +2733,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2737,7 +2749,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2753,7 +2765,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, +static int ip_vs_genl_parse_service(struct net *net, + struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) { @@ -2796,9 +2809,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, } if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(usvc->af, usvc->protocol, + svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); *ret_svc = svc; @@ -2835,13 +2848,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, + struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -2909,6 +2923,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + struct net *net; mutex_lock(&__ip_vs_mutex); @@ -2917,7 +2932,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3102,13 +3118,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(info->attrs); @@ -3133,7 +3151,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out; } @@ -3143,7 +3161,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(&usvc, + ret = ip_vs_genl_parse_service(net, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3173,7 +3191,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); else ret = -EEXIST; break; @@ -3211,7 +3229,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3240,7 +3260,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(net, + info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); goto out_err; @@ -3411,9 +3432,15 @@ static void ip_vs_genl_unregister(void) */ int __net_init __ip_vs_control_init(struct net *net) { + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, @@ -3445,43 +3472,48 @@ static struct pernet_operations ipvs_control_ops = { int __init ip_vs_control_init(void) { - int ret; int idx; + int ret; EnterFunction(2); - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_rtable[idx]); + + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) { + pr_err("cannot register namespace.\n"); + goto err; } - smp_wmb(); + + smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - return ret; + goto err_net; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); nf_unregister_sockopt(&ip_vs_sockopts); - return ret; + goto err_net; } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) - return ret; - /* Hook the defense timer */ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); LeaveFunction(2); return 0; + +err_net: + unregister_pernet_subsys(&ipvs_control_ops); +err: + return ret; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a315159983ad..521b827083fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -12,6 +12,7 @@ static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,9 +28,9 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; - + net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1cdab12abfef..c175d3166263 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -31,6 +31,7 @@ static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; @@ -42,11 +43,11 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - + net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, - th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, + &iph.daddr, th->dest))) { int ignored; if (ip_vs_todrop()) { diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index cd398de010cc..5ab54f648654 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -31,6 +31,7 @@ static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; struct ip_vs_iphdr iph; @@ -42,8 +43,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - - svc = ip_vs_service_get(af, skb->mark, iph.protocol, + net = skb_net(skb); + svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3668739a6d06..662aa2c22a05 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -749,7 +749,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ -- cgit v1.2.3-59-g8ed1b From 4a85b96c08ef84076f84e87280223a4301988ed9 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:47 +0100 Subject: IPVS: netns preparation for proto_tcp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use all ip_vs_proto_data *v3 Removed unused function as sugested by Simon Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- include/net/netns/ip_vs.h | 8 +++ net/netfilter/ipvs/ip_vs_ftp.c | 8 ++- net/netfilter/ipvs/ip_vs_proto.c | 13 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 +++++++++++++++++++----------------- 5 files changed, 79 insertions(+), 49 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 88d4e40b538a..3c45a00cdc3e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); -extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); +extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); extern int ip_vs_check_template(struct ip_vs_conn *ct); extern void ip_vs_random_dropentry(void); extern int ip_vs_conn_init(void); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 6f4e089b8db2..ac77363647ab 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -31,6 +31,14 @@ struct netns_ipvs { /* ip_vs_proto */ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; + /* ip_vs_proto_tcp */ +#ifdef CONFIG_IP_VS_PROTO_TCP + #define TCP_APP_TAB_BITS 4 + #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) + #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) + struct list_head tcp_apps[TCP_APP_TAB_SIZE]; + spinlock_t tcp_app_lock; +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 0e762f322aa3..b38ae941f677 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ + net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(n_cp); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - ip_vs_tcp_conn_listen(n_cp); + net = skb_net(skb); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return 1; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 576e29648c53..320c6a65f370 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ static int __net_init __ip_vs_protocol_init(struct net *net) { +#ifdef CONFIG_IP_VS_PROTO_TCP + register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); +#endif return 0; } static void __net_exit __ip_vs_protocol_cleanup(struct net *net) { - /* empty */ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + int i; + + /* unregister all the ipvs proto data for this netns */ + for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { + while ((pd = ipvs->proto_data_table[i]) != NULL) + unregister_ip_vs_proto_netns(net, pd); + } } static struct pernet_operations ipvs_proto_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index c175d3166263..9d9df3d61093 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -9,8 +9,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom * + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * tcp_timeouts table has copy per netns in a hash table per + * protocol ip_vs_proto_data and is handled by netns */ #define KMSG_COMPONENT "IPVS" @@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { /* * Timeout table[state] */ -static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { +static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_NONE] = 2*HZ, [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, @@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) tcp_state_table = (on? tcp_states_dos : tcp_states); } -static int -tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, - tcp_state_name_table, sname, to); -} - static inline int tcp_state_idx(struct tcphdr *th) { if (th->rst) @@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; + struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - cp->timeout = pp->timeout_table[cp->state = new_state]; + pd = ip_vs_proto_data_get(&init_net, pp->protocol); + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = new_state]; + else /* What to do ? */ + cp->timeout = tcp_timeouts[cp->state = new_state]; } - /* * Handle state transitions */ @@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 1; } - -/* - * Hash table for TCP application incarnations - */ -#define TCP_APP_TAB_BITS 4 -#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) -#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) - -static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(tcp_app_lock); - static inline __u16 tcp_app_hashkey(__be16 port) { return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) @@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); hash = tcp_app_hashkey(port); - spin_lock_bh(&tcp_app_lock); - list_for_each_entry(i, &tcp_apps[hash], p_list) { + spin_lock_bh(&ipvs->tcp_app_lock); + list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &tcp_apps[hash]); - atomic_inc(&ip_vs_protocol_tcp.appcnt); + list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc) static void tcp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&tcp_app_lock); - atomic_dec(&ip_vs_protocol_tcp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + + spin_lock_bh(&ipvs->tcp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); } static int tcp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&tcp_app_lock); - list_for_each_entry(inc, &tcp_apps[hash], p_list) { + spin_lock(&ipvs->tcp_app_lock); + list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); out: return result; @@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + spin_lock(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; - cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; + cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] + : tcp_timeouts[IP_VS_TCP_S_LISTEN]); spin_unlock(&cp->lock); } - -static void ip_vs_tcp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(tcp_apps); - pp->timeout_table = tcp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, + sizeof(tcp_timeouts)); +} -static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, .appcnt = ATOMIC_INIT(0), - .init = ip_vs_tcp_init, - .exit = ip_vs_tcp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_tcp_init, + .exit_netns = __ip_vs_tcp_exit, .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, @@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .app_conn_bind = tcp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = tcp_timeout_change, - .set_state_timeout = tcp_set_state_timeout, }; -- cgit v1.2.3-59-g8ed1b From 9330419d9aa4f97df412ac9be9fc0388c67dd315 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:51 +0100 Subject: IPVS: netns, use ip_vs_proto_data as param. ip_vs_protocol *pp is replaced by ip_vs_proto_data *pd in function call in ip_vs_protocol struct i.e. :, - timeout_change() - state_transition() ip_vs_protocol_timeout_change() got ipvs as param, due to above and a upcoming patch - defence work Most of this changes are triggered by Julians comment: "tcp_timeout_change should work with the new struct ip_vs_proto_data so that tcp_state_table will go to pd->state_table and set_tcp_state will get pd instead of pp" *v3 Mostly comments from Julian The pp -> pd conversion should start from functions like ip_vs_out() that use pp = ip_vs_proto_get(iph.protocol), now they should use ip_vs_proto_data_get(net, iph.protocol). conn_in_get() and conn_out_get() unused param *pp, removed. *v4 ip_vs_protocol_timeout_change() walk the proto_data path. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 18 +++----- net/netfilter/ipvs/ip_vs_conn.c | 2 - net/netfilter/ipvs/ip_vs_core.c | 77 ++++++++++++++++++++------------- net/netfilter/ipvs/ip_vs_ctl.c | 55 ++++++++++++++--------- net/netfilter/ipvs/ip_vs_proto.c | 21 ++++++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 10 ++--- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++---- net/netfilter/ipvs/ip_vs_proto_tcp.c | 27 +++++------- net/netfilter/ipvs/ip_vs_proto_udp.c | 11 ++--- net/netfilter/xt_ipvs.c | 2 +- 10 files changed, 129 insertions(+), 110 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3c45a00cdc3e..464ea365ca07 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -372,13 +372,12 @@ struct ip_vs_protocol { void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); int (*conn_schedule)(int af, struct sk_buff *skb, - struct ip_vs_protocol *pp, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp); struct ip_vs_conn * (*conn_in_get)(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -386,7 +385,6 @@ struct ip_vs_protocol { struct ip_vs_conn * (*conn_out_get)(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -404,7 +402,7 @@ struct ip_vs_protocol { int (*state_transition)(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp); + struct ip_vs_proto_data *pd); int (*register_app)(struct ip_vs_app *inc); @@ -417,9 +415,7 @@ struct ip_vs_protocol { int offset, const char *msg); - void (*timeout_change)(struct ip_vs_protocol *pp, int flags); - - int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); + void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); }; /* @@ -778,7 +774,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -786,7 +781,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -917,7 +911,7 @@ static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) */ extern int ip_vs_protocol_init(void); extern void ip_vs_protocol_cleanup(void); -extern void ip_vs_protocol_timeout_change(int flags); +extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); extern int *ip_vs_create_timeout_table(int *table, int size); extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, @@ -947,9 +941,9 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored); + struct ip_vs_proto_data *pd, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp); + struct ip_vs_proto_data *pd); /* diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7a0e79e3ad0f..a7aba6a4697e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -329,7 +329,6 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -428,7 +427,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d0616ea1eebf..9317affc5ea1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -177,11 +177,11 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) static inline int ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - if (unlikely(!pp->state_transition)) + if (unlikely(!pd->pp->state_transition)) return 0; - return pp->state_transition(cp, direction, skb, pp); + return pd->pp->state_transition(cp, direction, skb, pd); } static inline int @@ -378,8 +378,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored) + struct ip_vs_proto_data *pd, int *ignored) { + struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; @@ -408,7 +409,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -479,11 +480,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -530,10 +532,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_in_stats(cp, skb); /* set state */ - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pd->pp); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -840,7 +842,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -917,7 +919,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -956,9 +958,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) * Used for NAT and local client. */ static unsigned int -handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { + struct ip_vs_protocol *pp = pd->pp; + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) @@ -1007,7 +1011,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); @@ -1034,6 +1038,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; EnterFunction(11); @@ -1079,9 +1084,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; + pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 @@ -1107,10 +1113,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); if (likely(cp)) - return handle_response(af, skb, pp, cp, iph.len); + return handle_response(af, skb, pd, cp, iph.len); if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1236,12 +1242,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, static int ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, ihl, verdict; union nf_inet_addr snet; @@ -1283,9 +1291,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->protocol); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && @@ -1299,10 +1309,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (cp) { snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, @@ -1346,6 +1356,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; struct ipv6hdr _ciph, *cih; /* The ip header contained @@ -1353,6 +1364,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, verdict; union nf_inet_addr snet; struct rt6_info *rt; @@ -1395,9 +1407,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->nexthdr); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->nexthdr); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ /* TODO: we don't support fragmentation at the moment anyways */ @@ -1411,10 +1425,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (cp) { ipv6_addr_copy(&snet.in6, &iph->saddr); return handle_response_icmp(AF_INET6, skb, &snet, @@ -1457,8 +1471,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; @@ -1514,20 +1530,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } + net = skb_net(skb); /* Protocol supported? */ - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; - + pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(af, skb, pp, &v, &cp)) + if (!pp->conn_schedule(af, skb, pd, &v, &cp)) return v; } @@ -1555,7 +1572,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } ip_vs_in_stats(cp, skb); - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2d7c96bd2114..88474f1e828a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -38,6 +38,7 @@ #include #include +#include #include #ifdef CONFIG_IP_VS_IPV6 #include @@ -125,7 +126,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs */ -static void update_defense_level(void) +static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; static int old_secure_tcp = 0; @@ -239,7 +240,8 @@ static void update_defense_level(void) } old_secure_tcp = sysctl_ip_vs_secure_tcp; if (to_change >= 0) - ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); + ip_vs_protocol_timeout_change(ipvs, + sysctl_ip_vs_secure_tcp > 1); spin_unlock(&ip_vs_securetcp_lock); local_bh_enable(); @@ -255,7 +257,10 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - update_defense_level(); + struct net *net = &init_net; + struct netns_ipvs *ipvs = net_ipvs(net); + + update_defense_level(ipvs); if (atomic_read(&ip_vs_dropentry)) ip_vs_random_dropentry(); @@ -1502,6 +1507,7 @@ static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = current->nsproxy->net_ns; int *valp = table->data; int val = *valp; int rc; @@ -1512,7 +1518,7 @@ proc_do_defense_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(); + update_defense_level(net_ipvs(net)); } } return rc; @@ -2033,8 +2039,10 @@ static const struct file_operations ip_vs_stats_fops = { /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", u->tcp_timeout, u->tcp_fin_timeout, @@ -2042,19 +2050,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } #endif #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } #endif @@ -2158,7 +2169,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; @@ -2370,17 +2381,19 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + #ifdef CONFIG_IP_VS_PROTO_TCP - u->tcp_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; - u->tcp_fin_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; + u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); u->udp_timeout = - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; + pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif } @@ -2521,7 +2534,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -3092,11 +3105,11 @@ static int ip_vs_genl_del_daemon(struct nlattr **attrs) return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } -static int ip_vs_genl_set_config(struct nlattr **attrs) +static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3108,7 +3121,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(&t); + return ip_vs_set_timeout(net, &t); } static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) @@ -3129,7 +3142,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(info->attrs); + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3281,7 +3294,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); #ifdef CONFIG_IP_VS_PROTO_TCP NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 9f609d4d5d58..6ac986cdcff3 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -152,9 +152,8 @@ EXPORT_SYMBOL(ip_vs_proto_get); * get ip_vs_protocol object data by netns and proto */ struct ip_vs_proto_data * -ip_vs_proto_data_get(struct net *net, unsigned short proto) +__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; unsigned hash = IP_VS_PROTO_HASH(proto); @@ -165,20 +164,28 @@ ip_vs_proto_data_get(struct net *net, unsigned short proto) return NULL; } + +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + return __ipvs_proto_data_get(ipvs, proto); +} EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols */ -void ip_vs_protocol_timeout_change(int flags) +void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) { - struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; int i; for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { - for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { - if (pp->timeout_change) - pp->timeout_change(pp, flags); + for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { + if (pd->pp->timeout_change) + pd->pp->timeout_change(pd, flags); } } } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index b8b37fafc988..28039cbfcff4 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -55,7 +55,7 @@ ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_in_get(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -72,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -83,7 +83,6 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) @@ -97,7 +96,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -107,7 +106,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { /* @@ -137,7 +136,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .app_conn_bind = NULL, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index f826dd1e4630..19bc37976ea7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,7 +9,7 @@ #include static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -47,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -907,14 +907,13 @@ static const char *sctp_state_name(int state) } static inline int -set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl; - struct ip_vs_proto_data *pd; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -966,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), IP_VS_DBG_ADDR(cp->af, &cp->daddr), @@ -990,7 +989,6 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */ if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = next_state]; else /* What to do ? */ @@ -1001,12 +999,12 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int sctp_state_transition(struct ip_vs_conn *cp, int direction, - const struct sk_buff *skb, struct ip_vs_protocol *pp) + const struct sk_buff *skb, struct ip_vs_proto_data *pd) { int ret = 0; spin_lock(&cp->lock); - ret = set_sctp_state(pp, cp, direction, skb); + ret = set_sctp_state(pd, cp, direction, skb); spin_unlock(&cp->lock); return ret; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9d9df3d61093..d7c245532798 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -32,7 +32,7 @@ #include static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -68,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -448,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -static struct tcp_states_t *tcp_state_table = tcp_states; - - -static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) +static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) { int on = (flags & 1); /* secure_tcp */ @@ -461,7 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) ** for most if not for all of the applications. Something ** like "capabilities" (flags) for each object. */ - tcp_state_table = (on? tcp_states_dos : tcp_states); + pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); } static inline int tcp_state_idx(struct tcphdr *th) @@ -478,13 +475,12 @@ static inline int tcp_state_idx(struct tcphdr *th) } static inline void -set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, struct tcphdr *th) { int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; - struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -502,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, goto tcp_state_out; } - new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; + new_state = + pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; tcp_state_out: if (new_state != cp->state) { @@ -510,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), th->syn ? 'S' : '.', @@ -540,7 +537,6 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = new_state]; else /* What to do ? */ @@ -553,7 +549,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { struct tcphdr _tcph, *th; @@ -568,7 +564,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 0; spin_lock(&cp->lock); - set_tcp_state(pp, cp, direction, th); + set_tcp_state(pd, cp, direction, th); spin_unlock(&cp->lock); return 1; @@ -691,6 +687,7 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + pd->tcp_state_table = tcp_states; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 71a4721a8f8a..aa85df2f14a0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ #include static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -64,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -457,11 +457,8 @@ static const char * udp_state_name(int state) static int udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */ - - pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d8aa35..bb10b0717f1b 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; -- cgit v1.2.3-59-g8ed1b From 9bbac6a904d0816dae58b454692c54d6773cc20d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:52 +0100 Subject: IPVS: netns, common protocol changes and use of appcnt. appcnt and timeout_table moved from struct ip_vs_protocol to ip_vs proto_data. struct net *net added as first param to - register_app() - unregister_app() - app_conn_bind() - ip_vs_conn_new() [horms@verge.net.au: removed cosmetic-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 -- net/netfilter/ipvs/ip_vs_conn.c | 6 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 +-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-- net/netfilter/ipvs/ip_vs_sync.c | 55 +++++++++++++++++++---------------- 6 files changed, 39 insertions(+), 37 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 464ea365ca07..cc6ae621a9b5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -360,8 +360,6 @@ struct ip_vs_protocol { u16 protocol; u16 num_states; int dont_defrag; - atomic_t appcnt; /* counter of proto app incs */ - int *timeout_table; /* protocol timeout table */ void (*init)(struct ip_vs_protocol *pp); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a7aba6a4697e..b2024c942345 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -804,7 +804,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -863,8 +863,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, #endif ip_vs_bind_xmit(cp); - if (unlikely(pp && atomic_read(&pp->appcnt))) - ip_vs_bind_app(cp, pp); + if (unlikely(pd && atomic_read(&pd->appcnt))) + ip_vs_bind_app(cp, pd->pp); /* * Allow conntrack to be preserved. By default, conntrack diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 19bc37976ea7..0f14f793318a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1035,7 +1035,7 @@ static int sctp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->sctp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->sctp_app_lock); @@ -1048,7 +1048,7 @@ static void sctp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->sctp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d7c245532798..290b3803d8ce 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -596,7 +596,7 @@ static int tcp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->tcp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->tcp_app_lock); @@ -611,7 +611,7 @@ tcp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->tcp_app_lock); } @@ -701,7 +701,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .protocol = IPPROTO_TCP, .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), .init = NULL, .exit = NULL, .init_netns = __ip_vs_tcp_init, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index aa85df2f14a0..3719837a8fdc 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -373,7 +373,7 @@ static int udp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->udp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->udp_app_lock); @@ -388,7 +388,7 @@ udp_unregister_app(struct ip_vs_app *inc) struct netns_ipvs *ipvs = net_ipvs(&init_net); spin_lock_bh(&ipvs->udp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->udp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 662aa2c22a05..6831e8fac8db 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -725,17 +725,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, - unsigned state, unsigned protocol, unsigned type, +static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, + unsigned int flags, unsigned int state, + unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, - struct ip_vs_sync_conn_options *opt, - struct ip_vs_protocol *pp) + struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); else @@ -821,17 +820,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; - } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) - cp->timeout = pp->timeout_table[state]; - else - cp->timeout = (3*60*HZ); + } else { + struct ip_vs_proto_data *pd; + + pd = ip_vs_proto_data_get(net, protocol); + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) + cp->timeout = pd->timeout_table[state]; + else + cp->timeout = (3*60*HZ); + } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) +static void ip_vs_process_message_v0(struct net *net, const char *buffer, + const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; @@ -879,7 +884,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", state); @@ -894,9 +898,9 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, - 0, 0, opt, pp); + 0, 0, opt); } } @@ -945,7 +949,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1043,7 +1047,6 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", state); @@ -1058,18 +1061,18 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #endif return 0; /* Error exit */ @@ -1083,7 +1086,8 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(__u8 *buffer, const size_t buflen) +static void ip_vs_process_message(struct net *net, __u8 *buffer, + const size_t buflen) { struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; @@ -1136,7 +1140,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) return; } /* Process a single sync_conn */ - if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { + retc = ip_vs_proc_sync_conn(net, p, msg_end); + if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; @@ -1146,7 +1151,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) } } else { /* Old type of message */ - ip_vs_process_message_v0(buffer, buflen); + ip_vs_process_message_v0(net, buffer, buflen); return; } } @@ -1500,7 +1505,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(tinfo->buf, len); + ip_vs_process_message(&init_net, tinfo->buf, len); local_bh_enable(); } } -- cgit v1.2.3-59-g8ed1b From ab8a5e8408c3df2d654611bffc3aaf04f418b266 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:53 +0100 Subject: IPVS: netns awareness to ip_vs_app All variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) in ip_vs_protocol param struct net *net added to: - register_app() - unregister_app() This affected almost all proto_xxx.c files Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 +++--- include/net/netns/ip_vs.h | 5 +++ net/netfilter/ipvs/ip_vs_app.c | 73 +++++++++++++++++++++-------------- net/netfilter/ipvs/ip_vs_ftp.c | 8 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++--- 7 files changed, 76 insertions(+), 58 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cc6ae621a9b5..0cdd8ce454c2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -402,9 +402,9 @@ struct ip_vs_protocol { const struct sk_buff *skb, struct ip_vs_proto_data *pd); - int (*register_app)(struct ip_vs_app *inc); + int (*register_app)(struct net *net, struct ip_vs_app *inc); - void (*unregister_app)(struct ip_vs_app *inc); + void (*unregister_app)(struct net *net, struct ip_vs_app *inc); int (*app_conn_bind)(struct ip_vs_conn *cp); @@ -871,12 +871,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) * (from ip_vs_app.c) */ #define IP_VS_APP_MAX_PORTS 8 -extern int register_ip_vs_app(struct ip_vs_app *app); -extern void unregister_ip_vs_app(struct ip_vs_app *app); +extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); +extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); extern void ip_vs_unbind_app(struct ip_vs_conn *cp); -extern int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); +extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, + __u16 proto, __u16 port); extern int ip_vs_app_inc_get(struct ip_vs_app *inc); extern void ip_vs_app_inc_put(struct ip_vs_app *inc); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 58bd3fd85a97..03f7fe1bede6 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -28,6 +28,11 @@ struct netns_ipvs { #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) struct list_head rs_table[IP_VS_RTAB_SIZE]; + /* ip_vs_app */ + struct list_head app_list; + struct mutex app_mutex; + struct lock_class_key app_key; /* mutex debuging */ + /* ip_vs_proto */ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 40b09ccc4896..286f46594e0e 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); -/* ipvs application list head */ -static LIST_HEAD(ip_vs_app_list); -static DEFINE_MUTEX(__ip_vs_app_mutex); - - /* * Get an ip_vs_app object */ @@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) +ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; @@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) } } - ret = pp->register_app(inc); + ret = pp->register_app(net, inc); if (ret) goto out; @@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) * Release app incarnation */ static void -ip_vs_app_inc_release(struct ip_vs_app *inc) +ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(inc); + pp->unregister_app(net, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) +register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { + struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - result = ip_vs_app_inc_new(app, proto, port); + result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return result; } @@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) /* * ip_vs_app registration routine */ -int register_ip_vs_app(struct ip_vs_app *app) +int register_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - list_add(&app->a_list, &ip_vs_app_list); + list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return 0; } @@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services */ -void unregister_ip_vs_app(struct ip_vs_app *app) +void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(inc); + ip_vs_app_inc_release(net, inc); } list_del(&app->a_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ -int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) +int ip_vs_bind_app(struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } @@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) * /proc/net/ip_vs_app entry function */ -static struct ip_vs_app *ip_vs_app_idx(loff_t pos) +static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; - list_for_each_entry(app, &ip_vs_app_list, a_list) { + list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; @@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { - mutex_lock(&__ip_vs_app_mutex); + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + + mutex_lock(&ipvs->app_mutex); - return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_app_idx(0); + return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; @@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ - for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { + for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; @@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&__ip_vs_app_mutex); + struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); + + mutex_unlock(&ipvs->app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { static int ip_vs_app_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_app_seq_ops); + return seq_open_net(inode, file, &ip_vs_app_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ip_vs_app_fops = { @@ -571,9 +580,13 @@ static const struct file_operations ip_vs_app_fops = { static int __net_init __ip_vs_app_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + INIT_LIST_HEAD(&ipvs->app_list); + __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b38ae941f677..77b0036dcb73 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -414,14 +414,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; - ret = register_ip_vs_app(app); + ret = register_ip_vs_app(net, app); if (ret) return ret; for (i=0; iprotocol, ports[i]); + ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); if (ret) break; pr_info("%s: loaded support on port[%d] = %d\n", @@ -429,7 +429,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) } if (ret) - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); return ret; } @@ -443,7 +443,7 @@ static void __ip_vs_ftp_exit(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 0f14f793318a..569e77bf08c4 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1016,14 +1016,14 @@ static inline __u16 sctp_app_hashkey(__be16 port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct ip_vs_app *inc) +static int sctp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); @@ -1042,10 +1042,10 @@ out: return ret; } -static void sctp_unregister_app(struct ip_vs_app *inc) +static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 290b3803d8ce..757aaaf083bb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -577,14 +577,14 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct ip_vs_app *inc) +static int tcp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); hash = tcp_app_hashkey(port); @@ -605,10 +605,10 @@ static int tcp_register_app(struct ip_vs_app *inc) static void -tcp_unregister_app(struct ip_vs_app *inc) +tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 3719837a8fdc..1dc394100fa8 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -353,14 +353,14 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct ip_vs_app *inc) +static int udp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); hash = udp_app_hashkey(port); @@ -382,10 +382,10 @@ static int udp_register_app(struct ip_vs_app *inc) static void -udp_unregister_app(struct ip_vs_app *inc) +udp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); -- cgit v1.2.3-59-g8ed1b From 6e67e586e7289c144d5a189d6e0fa7141d025746 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:57 +0100 Subject: IPVS: netns, connection hash got net as param. Connection hash table is now name space aware. i.e. net ptr >> 8 is xor:ed to the hash, and this is the first param to be compared. The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s) and cache-line aligned, so a ptr >> 5 might be a more clever solution ? All lookups where net is compared uses net_eq() which returns 1 when netns is disabled, and the compiler seems to do something clever in that case. ip_vs_conn_fill_param() have *net as first param now. Three new inlines added to keep conn struct smaller when names space is disabled. - ip_vs_conn_net() - ip_vs_conn_net_set() - ip_vs_conn_net_eq() *v3 moved net compare to the end in "fast path" Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 53 +++++++++++---- include/net/netns/ip_vs.h | 2 + net/netfilter/ipvs/ip_vs_conn.c | 112 ++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_core.c | 15 +++-- net/netfilter/ipvs/ip_vs_ftp.c | 14 ++-- net/netfilter/ipvs/ip_vs_nfct.c | 6 +- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 15 +++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 13 ++-- 11 files changed, 153 insertions(+), 83 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 605d5db81a39..f82c0ffdee74 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -477,6 +477,7 @@ extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, unsigned short proto); struct ip_vs_conn_param { + struct net *net; const union nf_inet_addr *caddr; const union nf_inet_addr *vaddr; __be16 cport; @@ -494,17 +495,19 @@ struct ip_vs_conn_param { */ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ - +#ifdef CONFIG_NET_NS + struct net *net; /* Name space */ +#endif /* Protocol, addresses and port numbers */ - u16 af; /* address family */ - union nf_inet_addr caddr; /* client address */ - union nf_inet_addr vaddr; /* virtual address */ - union nf_inet_addr daddr; /* destination address */ - volatile __u32 flags; /* status flags */ - __u32 fwmark; /* Fire wall mark from skb */ - __be16 cport; - __be16 vport; - __be16 dport; + u16 af; /* address family */ + __be16 cport; + __be16 vport; + __be16 dport; + __u32 fwmark; /* Fire wall mark from skb */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __u16 protocol; /* Which protocol (TCP/UDP) */ /* counter and timer */ @@ -547,6 +550,33 @@ struct ip_vs_conn { __u8 pe_data_len; }; +/* + * To save some memory in conn table when name space is disabled. + */ +static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) +{ +#ifdef CONFIG_NET_NS + return cp->net; +#else + return &init_net; +#endif +} +static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) +{ +#ifdef CONFIG_NET_NS + cp->net = net; +#endif +} + +static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, + struct net *net) +{ +#ifdef CONFIG_NET_NS + return cp->net == net; +#else + return 1; +#endif +} /* * Extended internal versions of struct ip_vs_service_user and @@ -796,13 +826,14 @@ enum { IP_VS_DIR_LAST, }; -static inline void ip_vs_conn_fill_param(int af, int protocol, +static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, const union nf_inet_addr *caddr, __be16 cport, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { + p->net = net; p->af = af; p->protocol = protocol; p->caddr = caddr; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index bd1dad872178..1acfb334e69b 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -66,6 +66,8 @@ struct netns_ipvs { struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ seqcount_t *ustats_seq; /* u64 read retry */ + /* ip_vs_conn */ + atomic_t conn_count; /* connection counter */ /* ip_vs_lblc */ int sysctl_lblc_expiration; struct ctl_table_header *lblc_ctl_header; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b2024c942345..0d5e4feabc1b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for current IPVS connections */ -static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); - /* counter for no client port connections */ static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); @@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table */ -#define CT_LOCKARRAY_BITS 4 +#define CT_LOCKARRAY_BITS 5 #define CT_LOCKARRAY_SIZE (1<>8)) & ip_vs_conn_tab_mask; #endif - return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) - & ip_vs_conn_tab_mask; + return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) ^ + ((size_t)net>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, - NULL, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { p.pe = cp->pe; @@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) } /* - * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. + * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) @@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; + struct net *net = skb_net(skb); pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return 1; if (likely(!inverse)) - ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], - &iph->daddr, pptr[1], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], - &iph->saddr, pptr[0], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + pptr[1], &iph->saddr, pptr[0], p); return 0; } @@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe_data && p->pe->ct_match) { if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; @@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && - p->vport == cp->cport && p->cport == cp->dport && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; @@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, - &cp->vaddr, cp->vport, + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); return dest; @@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); cp->timeout = 60*HZ; @@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - atomic_dec(&ip_vs_conn_count); + atomic_dec(&ipvs->conn_count); kmem_cache_free(ip_vs_conn_cachep, cp); return; @@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); + struct netns_ipvs *ipvs = net_ipvs(p->net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); @@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); - atomic_inc(&ip_vs_conn_count); + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); @@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * /proc/net/ip_vs_conn entries */ #ifdef CONFIG_PROC_FS +struct ip_vs_iter_state { + struct seq_net_private p; + struct list_head *l; +}; static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) { int idx; struct ip_vs_conn *cp; + struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } } @@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) { - seq->private = NULL; + struct ip_vs_iter_state *iter = seq->private; + + iter->l = NULL; return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; - struct list_head *e, *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *e, *l = iter->l; int idx; ++*pos; @@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } ct_read_unlock_bh(idx); } - seq->private = NULL; + iter->l = NULL; return NULL; } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) { - struct list_head *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *l = iter->l; if (l) ct_read_unlock_bh(l - ip_vs_conn_tab); @@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; + if (!ip_vs_conn_net_eq(cp, net)) + return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { static int ip_vs_conn_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_fops = { @@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); + + if (!ip_vs_conn_net_eq(cp, net)) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_sync_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_sync_fops = { @@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(void) +static void ip_vs_conn_flush(struct net *net) { int idx; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { @@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void) ct_write_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - + if (!ip_vs_conn_net_eq(cp, net)) + continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); if (cp->control) { @@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void) /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ - if (atomic_read(&ip_vs_conn_count) != 0) { + if (atomic_read(&ipvs->conn_count) != 0) { schedule(); goto flush_again; } @@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void) */ int __net_init __ip_vs_conn_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + atomic_set(&ipvs->conn_count, 0); proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); @@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; + /* flush all the connection entries first */ + ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } @@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { unregister_pernet_subsys(&ipvs_conn_ops); - /* flush all the connection entries first */ - ip_vs_conn_flush(); - /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7e6a2a046bf5..7205b49c56c1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); + ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + vport, p); p->pe = svc->pe; if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, - &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, + src_port, &iph.daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, - pptr[0], &iph.daddr, pptr[1], &p); + + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, + &iph.saddr, pptr[0], &iph.daddr, pptr[1], + &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77b0036dcb73..6a04f9ab9d0d 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, - &from, port, &cp->caddr, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &from, port, + &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, + ip_vs_conn_fill_param(ip_vs_conn_net(cp), + AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | @@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, - &cp->vaddr, htons(ntohs(cp->vport)-1), - &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &to, port, &cp->vaddr, + htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { n_cp = ip_vs_conn_new(&p, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647cd450..f454c80df0a7 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, struct nf_conntrack_tuple *orig, new_reply; struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = nf_ct_net(ct); if (exp->tuple.src.l3num != PF_INET) return; @@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 28039cbfcff4..5b8eb8b12c3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,15 +41,16 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) +ah_esp_conn_fill_param_proto(struct net *net, int af, + const struct ip_vs_iphdr *iph, int inverse, + struct ip_vs_conn_param *p) { if (likely(!inverse)) - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } @@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 569e77bf08c4..550365a690c7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static int sctp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 757aaaf083bb..d8b3f9f15826 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc) static int tcp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1dc394100fa8..581157bbded5 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc) static int udp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c29e73d686fb..f85e47daecc3 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -660,21 +660,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(af, sc->v6.protocol, + ip_vs_conn_fill_param(net, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(af, sc->v4.protocol, + ip_vs_conn_fill_param(net, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, } } - ip_vs_conn_fill_param(AF_INET, s->protocol, + ip_vs_conn_fill_param(net, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, @@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) state = 0; } } - if (ip_vs_conn_fill_param_sync(af, s, ¶m, - pe_data, pe_data_len, - pe_name, pe_name_len)) { + if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } -- cgit v1.2.3-59-g8ed1b From a0840e2e165a370ca24a59545e564e9881a55891 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:58 +0100 Subject: IPVS: netns, ip_vs_ctl local vars moved to ipvs struct. Moving global vars to ipvs struct, except for svc table lock. Next patch for ctl will be drop-rate handling. *v3 __ip_vs_mutex remains global ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 27 ++-- include/net/netns/ip_vs.h | 37 ++++- net/netfilter/ipvs/ip_vs_conn.c | 7 +- net/netfilter/ipvs/ip_vs_core.c | 34 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 291 ++++++++++++++++++---------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 9 +- 9 files changed, 230 insertions(+), 181 deletions(-) (limited to 'net/netfilter/ipvs/ip_vs_proto_tcp.c') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f82c0ffdee74..af9acf44e40a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -41,7 +41,7 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) * Get net ptr from skb in traffic cases * use skb_sknet when call is from userland (ioctl or netlink) */ -static inline struct net *skb_net(struct sk_buff *skb) +static inline struct net *skb_net(const struct sk_buff *skb) { #ifdef CONFIG_NET_NS #ifdef CONFIG_IP_VS_DEBUG @@ -69,7 +69,7 @@ static inline struct net *skb_net(struct sk_buff *skb) #endif } -static inline struct net *skb_sknet(struct sk_buff *skb) +static inline struct net *skb_sknet(const struct sk_buff *skb) { #ifdef CONFIG_NET_NS #ifdef CONFIG_IP_VS_DEBUG @@ -1023,13 +1023,6 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* * IPVS control data and functions (from ip_vs_ctl.c) */ -extern int sysctl_ip_vs_cache_bypass; -extern int sysctl_ip_vs_expire_nodest_conn; -extern int sysctl_ip_vs_expire_quiescent_template; -extern int sysctl_ip_vs_sync_threshold[2]; -extern int sysctl_ip_vs_nat_icmp_send; -extern int sysctl_ip_vs_conntrack; -extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern int sysctl_ip_vs_sync_ver; @@ -1119,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6 extern int ip_vs_drop_rate; extern int ip_vs_drop_counter; -static __inline__ int ip_vs_todrop(void) +static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { - if (!ip_vs_drop_rate) return 0; - if (--ip_vs_drop_counter > 0) return 0; - ip_vs_drop_counter = ip_vs_drop_rate; + if (!ipvs->drop_rate) + return 0; + if (--ipvs->drop_counter > 0) + return 0; + ipvs->drop_counter = ipvs->drop_rate; return 1; } @@ -1211,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) * Netfilter connection tracking * (from ip_vs_nfct.c) */ -static inline int ip_vs_conntrack_enabled(void) +static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { - return sysctl_ip_vs_conntrack; + return ipvs->sysctl_conntrack; } extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, @@ -1226,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); #else -static inline int ip_vs_conntrack_enabled(void) +static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { return 0; } diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 1acfb334e69b..c4b1abf258e4 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -61,13 +61,46 @@ struct netns_ipvs { struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; spinlock_t sctp_app_lock; #endif + /* ip_vs_conn */ + atomic_t conn_count; /* connection counter */ + /* ip_vs_ctl */ struct ip_vs_stats *tot_stats; /* Statistics & est. */ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ seqcount_t *ustats_seq; /* u64 read retry */ - /* ip_vs_conn */ - atomic_t conn_count; /* connection counter */ + int num_services; /* no of virtual services */ + /* 1/rate drop and drop-entry variables */ + int drop_rate; + int drop_counter; + atomic_t dropentry; + /* locks in ctl.c */ + spinlock_t dropentry_lock; /* drop entry handling */ + spinlock_t droppacket_lock; /* drop packet handling */ + spinlock_t securetcp_lock; /* state and timeout tables */ + rwlock_t rs_lock; /* real services table */ + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ + struct lock_class_key ctl_key; /* ctl_mutex debuging */ + /* sys-ctl struct */ + struct ctl_table_header *sysctl_hdr; + struct ctl_table *sysctl_tbl; + /* sysctl variables */ + int sysctl_amemthresh; + int sysctl_am_droprate; + int sysctl_drop_entry; + int sysctl_drop_packet; + int sysctl_secure_tcp; +#ifdef CONFIG_IP_VS_NFCT + int sysctl_conntrack; +#endif + int sysctl_snat_reroute; + int sysctl_sync_ver; + int sysctl_cache_bypass; + int sysctl_expire_nodest_conn; + int sysctl_expire_quiescent_template; + int sysctl_sync_threshold[2]; + int sysctl_nat_icmp_send; + /* ip_vs_lblc */ int sysctl_lblc_expiration; struct ctl_table_header *lblc_ctl_header; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 0d5e4feabc1b..5ba205a4d79c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); /* * Checking the dest server status. */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (sysctl_ip_vs_expire_quiescent_template && + (ipvs->sysctl_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " @@ -879,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * IP_VS_CONN_F_ONE_PACKET too. */ - if (ip_vs_conntrack_enabled()) + if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; /* Hash it in the ip_vs_conn_tab finally */ @@ -1198,7 +1199,7 @@ static void ip_vs_conn_flush(struct net *net) struct ip_vs_conn *cp; struct netns_ipvs *ipvs = net_ipvs(net); - flush_again: +flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { /* * Lock is actually needed in this loop. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7205b49c56c1..a7c59a722af3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { + struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; @@ -521,7 +522,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { + ipvs = net_ipvs(skb_net(skb)); + if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -733,6 +735,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { + struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -754,6 +757,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -763,11 +768,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto out; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto out; @@ -979,6 +984,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; + struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1014,13 +1020,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto drop; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; @@ -1057,6 +1065,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs; EnterFunction(11); @@ -1131,10 +1140,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (sysctl_ip_vs_nat_icmp_send && + if (ipvs->sysctl_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1580,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_ip_vs_expire_nodest_conn) { + if (ipvs->sysctl_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } @@ -1610,15 +1620,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = sysctl_ip_vs_sync_threshold[0]; + pkts = ipvs->sysctl_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1632,8 +1642,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index cbd58c60e1bf..183ac18bded5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -58,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); /* lock for service table */ static DEFINE_RWLOCK(__ip_vs_svc_lock); -/* lock for table with the real services */ -static DEFINE_RWLOCK(__ip_vs_rs_lock); - -/* lock for state and timeout tables */ -static DEFINE_SPINLOCK(ip_vs_securetcp_lock); - -/* lock for drop entry handling */ -static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); - -/* lock for drop packet handling */ -static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); - -/* 1/rate drop and drop-entry variables */ -int ip_vs_drop_rate = 0; -int ip_vs_drop_counter = 0; -static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); - -/* number of virtual services */ -static int ip_vs_num_services = 0; - /* sysctl variables */ -static int sysctl_ip_vs_drop_entry = 0; -static int sysctl_ip_vs_drop_packet = 0; -static int sysctl_ip_vs_secure_tcp = 0; -static int sysctl_ip_vs_amemthresh = 1024; -static int sysctl_ip_vs_am_droprate = 10; -int sysctl_ip_vs_cache_bypass = 0; -int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_expire_quiescent_template = 0; -int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; -int sysctl_ip_vs_nat_icmp_send = 0; -#ifdef CONFIG_IP_VS_NFCT -int sysctl_ip_vs_conntrack; -#endif -int sysctl_ip_vs_snat_reroute = 1; -int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -142,73 +107,73 @@ static void update_defense_level(struct netns_ipvs *ipvs) /* si_swapinfo(&i); */ /* availmem = availmem - (i.totalswap - i.freeswap); */ - nomem = (availmem < sysctl_ip_vs_amemthresh); + nomem = (availmem < ipvs->sysctl_amemthresh); local_bh_disable(); /* drop_entry */ - spin_lock(&__ip_vs_dropentry_lock); - switch (sysctl_ip_vs_drop_entry) { + spin_lock(&ipvs->dropentry_lock); + switch (ipvs->sysctl_drop_entry) { case 0: - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); break; case 1: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); - sysctl_ip_vs_drop_entry = 2; + atomic_set(&ipvs->dropentry, 1); + ipvs->sysctl_drop_entry = 2; } else { - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); } break; case 2: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); } else { - atomic_set(&ip_vs_dropentry, 0); - sysctl_ip_vs_drop_entry = 1; + atomic_set(&ipvs->dropentry, 0); + ipvs->sysctl_drop_entry = 1; }; break; case 3: - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); break; } - spin_unlock(&__ip_vs_dropentry_lock); + spin_unlock(&ipvs->dropentry_lock); /* drop_packet */ - spin_lock(&__ip_vs_droppacket_lock); - switch (sysctl_ip_vs_drop_packet) { + spin_lock(&ipvs->droppacket_lock); + switch (ipvs->sysctl_drop_packet) { case 0: - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; break; case 1: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); - sysctl_ip_vs_drop_packet = 2; + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); + ipvs->sysctl_drop_packet = 2; } else { - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; } break; case 2: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); } else { - ip_vs_drop_rate = 0; - sysctl_ip_vs_drop_packet = 1; + ipvs->drop_rate = 0; + ipvs->sysctl_drop_packet = 1; } break; case 3: - ip_vs_drop_rate = sysctl_ip_vs_am_droprate; + ipvs->drop_rate = ipvs->sysctl_am_droprate; break; } - spin_unlock(&__ip_vs_droppacket_lock); + spin_unlock(&ipvs->droppacket_lock); /* secure_tcp */ - spin_lock(&ip_vs_securetcp_lock); - switch (sysctl_ip_vs_secure_tcp) { + spin_lock(&ipvs->securetcp_lock); + switch (ipvs->sysctl_secure_tcp) { case 0: if (old_secure_tcp >= 2) to_change = 0; @@ -217,7 +182,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) if (nomem) { if (old_secure_tcp < 2) to_change = 1; - sysctl_ip_vs_secure_tcp = 2; + ipvs->sysctl_secure_tcp = 2; } else { if (old_secure_tcp >= 2) to_change = 0; @@ -230,7 +195,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) } else { if (old_secure_tcp >= 2) to_change = 0; - sysctl_ip_vs_secure_tcp = 1; + ipvs->sysctl_secure_tcp = 1; } break; case 3: @@ -238,11 +203,11 @@ static void update_defense_level(struct netns_ipvs *ipvs) to_change = 1; break; } - old_secure_tcp = sysctl_ip_vs_secure_tcp; + old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, - sysctl_ip_vs_secure_tcp > 1); - spin_unlock(&ip_vs_securetcp_lock); + ipvs->sysctl_secure_tcp > 1); + spin_unlock(&ipvs->securetcp_lock); local_bh_enable(); } @@ -260,7 +225,7 @@ static void defense_work_handler(struct work_struct *work) struct netns_ipvs *ipvs = net_ipvs(&init_net); update_defense_level(ipvs); - if (atomic_read(&ip_vs_dropentry)) + if (atomic_read(&ipvs->dropentry)) ip_vs_random_dropentry(); schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); @@ -602,7 +567,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&__ip_vs_rs_lock); + read_lock(&ipvs->rs_lock); list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) @@ -610,11 +575,11 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, && ((dest->protocol == protocol) || dest->vfwmark)) { /* HIT */ - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return dest; } } - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return NULL; } @@ -788,9 +753,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1022,14 +987,16 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) */ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1092,7 +1059,6 @@ static int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - struct net *net = svc->net; __be16 dport = udest->port; EnterFunction(2); @@ -1121,7 +1087,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(net, dest); + __ip_vs_del_dest(svc->net, dest); LeaveFunction(2); @@ -1140,6 +1106,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1219,7 +1186,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services++; + ipvs->num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1359,12 +1326,13 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; + struct netns_ipvs *ipvs = net_ipvs(svc->net); pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services--; + ipvs->num_services--; ip_vs_kill_estimator(svc->net, &svc->stats); @@ -1589,42 +1557,31 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) + * Do not change order or insert new entries without + * align with netns init in __ip_vs_control_init() */ static struct ctl_table vs_vars[] = { { .procname = "amemthresh", - .data = &sysctl_ip_vs_amemthresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_IP_VS_DEBUG - { - .procname = "debug_level", - .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "am_droprate", - .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "drop_entry", - .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", - .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, @@ -1632,7 +1589,6 @@ static struct ctl_table vs_vars[] = { #ifdef CONFIG_IP_VS_NFCT { .procname = "conntrack", - .data = &sysctl_ip_vs_conntrack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -1640,25 +1596,62 @@ static struct ctl_table vs_vars[] = { #endif { .procname = "secure_tcp", - .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "snat_reroute", - .data = &sysctl_ip_vs_snat_reroute, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .procname = "sync_version", - .data = &sysctl_ip_vs_sync_ver, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_do_sync_mode, }, + { + .procname = "cache_bypass", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_nodest_conn", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_quiescent_template", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_threshold", + .maxlen = + sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), + .mode = 0644, + .proc_handler = proc_do_sync_threshold, + }, + { + .procname = "nat_icmp_send", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IP_VS_DEBUG + { + .procname = "debug_level", + .data = &sysctl_ip_vs_debug_level, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #if 0 { .procname = "timeout_established", @@ -1745,41 +1738,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec_jiffies, }, #endif - { - .procname = "cache_bypass", - .data = &sysctl_ip_vs_cache_bypass, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_nodest_conn", - .data = &sysctl_ip_vs_expire_nodest_conn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_quiescent_template", - .data = &sysctl_ip_vs_expire_quiescent_template, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sync_threshold", - .data = &sysctl_ip_vs_sync_threshold, - .maxlen = sizeof(sysctl_ip_vs_sync_threshold), - .mode = 0644, - .proc_handler = proc_do_sync_threshold, - }, - { - .procname = "nat_icmp_send", - .data = &sysctl_ip_vs_nat_icmp_send, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -1791,8 +1749,6 @@ const struct ctl_path net_vs_ctl_path[] = { }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); -static struct ctl_table_header * sysctl_header; - #ifdef CONFIG_PROC_FS struct ip_vs_iter { @@ -2543,7 +2499,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; info.size = ip_vs_conn_tab_size; - info.num_services = ip_vs_num_services; + info.num_services = ipvs->num_services; if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } @@ -3014,7 +2970,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; - struct net *net; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); @@ -3023,7 +2979,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3215,8 +3171,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); @@ -3326,8 +3284,10 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) void *reply; int ret, cmd, reply_cmd; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3530,9 +3490,21 @@ int __net_init __ip_vs_control_init(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); + struct ctl_table *tbl; if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + + atomic_set(&ipvs->dropentry, 0); + spin_lock_init(&ipvs->dropentry_lock); + spin_lock_init(&ipvs->droppacket_lock); + spin_lock_init(&ipvs->securetcp_lock); + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); if (ipvs->tot_stats == NULL) { @@ -3553,14 +3525,51 @@ int __net_init __ip_vs_control_init(struct net *net) proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); proc_net_fops_create(net, "ip_vs_stats_percpu", 0, &ip_vs_stats_percpu_fops); - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + + if (!net_eq(net, &init_net)) { + tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } else + tbl = vs_vars; + /* Initialize sysctl defaults */ + idx = 0; + ipvs->sysctl_amemthresh = 1024; + tbl[idx++].data = &ipvs->sysctl_amemthresh; + ipvs->sysctl_am_droprate = 10; + tbl[idx++].data = &ipvs->sysctl_am_droprate; + tbl[idx++].data = &ipvs->sysctl_drop_entry; + tbl[idx++].data = &ipvs->sysctl_drop_packet; +#ifdef CONFIG_IP_VS_NFCT + tbl[idx++].data = &ipvs->sysctl_conntrack; +#endif + tbl[idx++].data = &ipvs->sysctl_secure_tcp; + ipvs->sysctl_snat_reroute = 1; + tbl[idx++].data = &ipvs->sysctl_snat_reroute; + ipvs->sysctl_sync_ver = 1; + tbl[idx++].data = &ipvs->sysctl_sync_ver; + tbl[idx++].data = &ipvs->sysctl_cache_bypass; + tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; + ipvs->sysctl_sync_threshold[0] = 3; + ipvs->sysctl_sync_threshold[1] = 50; + tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); + tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + + + ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, vs_vars); - if (sysctl_header == NULL) + if (ipvs->sysctl_hdr == NULL) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); + ipvs->sysctl_tbl = tbl; return 0; err_reg: + if (!net_eq(net, &init_net)) + kfree(tbl); +err_dup: free_percpu(ipvs->cpustats); err_alloc: kfree(ipvs->tot_stats); @@ -3575,7 +3584,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) return; ip_vs_kill_estimator(net, ipvs->tot_stats); - unregister_net_sysctl_table(sysctl_header); + unregister_net_sysctl_table(ipvs->sysctl_hdr); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 550365a690c7..fb2d04ac5d4e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -34,7 +34,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, sh->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d8b3f9f15826..c0cc341b840d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -54,7 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, th->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 581157bbded5..f1282cbe6fe3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -50,7 +50,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (svc) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f85e47daecc3..b1780562c42b 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff) + if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (sysctl_ip_vs_sync_ver == 0) { + if (ipvs->sysctl_sync_ver == 0) { ip_vs_sync_conn_v0(net, cp); return; } @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) + if (pkts % ipvs->sysctl_sync_threshold[1] != 1) return; } goto sloop; @@ -724,6 +724,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, { struct ip_vs_dest *dest; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); @@ -794,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); cp->state = state; cp->old_state = cp->state; /* -- cgit v1.2.3-59-g8ed1b